#!/usr/bin/python2 ################################################################################## # An Oligonucleotide Finder written by Andrea Cabibbo # ###### # # Feel free to use/modify/redistribute the code # # # # # If you use significant parts of this code please preserve # # # # # this header and the site footer. ####### # # # If you find bugs or have suggestions, please contact # # # # # the author at andrea.cabibbo*mailsymbol*uniroma2 dot it # # ###### # ################################################################################## import cgitb; cgitb.enable() import sys import os sys.path.insert(0, os.getcwd()) # sys.path.insert(0, "/var/www/vhosts/cellbiol.com/cgi-bin/py") import cgi import re import string from copy import deepcopy def print_file(filename): a=open(filename) b=a.readlines() a.close() for line in b: print line def cleaner(seq): i=0 so=[] #seq_out for char in seq: if char in string.digits or char==('\\' or '/'): pass #print char,'character number %s is digit!! deleted'%i #elif char==' ' or char==' ' or (char in string.whitespace): #print char,'is whitespace!! deleted' if char not in 'GATCRYWSMKHBVDN': #IUPAC ambiguous_dna_values pass #so.append(in_seq[i]) # print 'WARNING: INVALID DNA CHARACTER %s DETECTED IN POSITION %s. Character NOT deleted'%(char,str(i+1)) else: so.append(seq[i]) #print char, 'normal character, not a number, not deleted' i+=1 so=string.join(so,'') return so def seq_format(seq,step=60,step2='none'): workseq=list(seq) outseq=[] if step2=='none': for i in range(0,len(workseq)): if float(string.split(str((float(i)+1)/step),'.')[1])==0.0: outseq.append(workseq[i]) outseq.append(' %s
'%str(i+1)) else: outseq.append(workseq[i]) else: for i in range(0,len(workseq)): if float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])==0.0: outseq.append(workseq[i]) outseq.append(' + ') elif float(string.split(str((float(i)+1)/step),'.')[1])==0.0: outseq.append(workseq[i]) outseq.append(' %s
'%str(i+1)) elif float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])!=0.0: outseq.append(workseq[i]) outseq=string.join(outseq,'') return outseq def re_const(oligo): oligo=oligo.upper() wo=list(oligo) re_out=[] for char in oligo: if char=='G': re_out.append('[GRSKVDBXN]') elif char=='A': re_out.append('[AMRWVHDXN]') elif char=='T': re_out.append('[TWYKHDBXN]') elif char=='C': re_out.append('[CMSVHBXN]') elif char=='M': re_out.append('[MACVHXN]') elif char=='R': re_out.append('[RAGVDXN]') elif char=='W': re_out.append('[WATHDXN]') elif char=='S': re_out.append('[SCGVBXN]') elif char=='Y': re_out.append('[YCTHBXN]') elif char=='K': re_out.append('[KGTDBXN]') elif char=='V': re_out.append('[VACGXNMRS]') elif char=='H': re_out.append('[HACTXNMWY]') elif char=='D': re_out.append('[DAGTXNRWK]') elif char=='B': re_out.append('[BCGTXNSYK]') elif char=='X': re_out.append('[XNMRWSYKVHDBGATC]') elif char=='N': re_out.append('[NXMRWSYKVHDBGATC]') re_out=string.join(re_out,'') re_out_comp=re.compile(re_out,re.IGNORECASE) return re_out_comp def match_oligo(seq,oligo,mismatches=0): re_oligo=re_const(oligo) L_out=[] for match in re_oligo.finditer(seq): L_out.append([match.start(),match.end(),match.group()]) return L_out """ def tagger(seq,matches_array,tag1='',tag2=''): ma=matches_array workseq=list(seq) outseq=[] for match in ma: print match[0],match[1],8*' ',match[2],'
' for i in range(0,len(workseq)): if i ==match[0]: outseq.append(tag1) outseq.append(workseq[i]) elif i ==match[1]-1: outseq.append(workseq[i]) outseq.append(tag2) else: outseq.append(workseq[i]) outseq=string.join(outseq,'') return outseq """ def tagger(seq,matches_array,tag1='',tag2=''): ma=matches_array workseq=list(seq) outseq=[] for i in range(0,len(workseq)): for match in ma: if i ==match[0]: outseq.append(tag1) outseq.append(workseq[i]) break elif i ==match[1]-1: outseq.append(workseq[i]) outseq.append(tag2) break else: outseq.append(workseq[i]) outseq=string.join(outseq,'') return outseq def seq_fat(seq,step,matches_array, step2='none', tag1='',tag2=''): # Format And Tag workseq2=list(seq) workseq=deepcopy(workseq2) ma=matches_array outseq=[] if step2=='none': for i in range(0,len(workseq2)): check='nope' for match in ma: if i ==match[0]: if float(string.split(str((float(i)+1)/step),'.')[1])==0.0: outseq.append(tag1) outseq.append(workseq[i]) outseq.append(' %s
'%str(i+1)) check='done' break else: outseq.append(tag1) outseq.append(workseq[i]) check='done' break elif i ==match[1]-1: if float(string.split(str((float(i)+1)/step),'.')[1])==0.0: outseq.append(workseq[i]) outseq.append(tag2) outseq.append(' %s
'%str(i+1)) check='done' break else: outseq.append(workseq[i]) outseq.append(tag2) check='done' break else: check='nope' if check=='nope': if float(string.split(str((float(i)+1)/step),'.')[1])==0.0: outseq.append(workseq[i]) outseq.append(' %s
'%str(i+1)) else: outseq.append(workseq[i]) outseq=string.join(outseq,'') return outseq if __name__=='__main__': print 'content-type: text/html\n\n' print_file('html/header.txt') form = cgi.FieldStorage() #print string.join(form.keys(),', ') if form['oligo'].value=='': #or (not form.has_key('seq')): print 'The oligo is missing, please try again' else: in_seq=form['seq'].value in_seq=in_seq.upper() oligo=form['oligo'].value oligo=oligo.upper() in_seq=list(in_seq) myseq=cleaner(in_seq) print 'Your cleaned input sequence:
','',seq_format(myseq),'

' #print 'Tagger only here...

','',tagger(myseq,match_oligo(myseq,'gaattc')),'','

' print 'Your sequence with matches highlighted in red and underlined

', '',seq_fat(myseq,60,match_oligo(myseq,oligo)),'' if match_oligo(myseq,oligo)!=[]: print '

The following matches were found:

' print '' for match in match_oligo(myseq,oligo): print ''%(match[0]+1,match[1],match[2]) print '
Match Start Match End Sequence
%s %s %s
' else: print '

Sorry, no matches were found' print_file('html/footer.txt')