#!/usr/bin/python2
##################################################################################
# An Oligonucleotide Finder written by Andrea Cabibbo # ###### #
# Feel free to use/modify/redistribute the code # # # #
# If you use significant parts of this code please preserve # # # #
# this header and the site footer. ####### # #
# If you find bugs or have suggestions, please contact # # # #
# the author at andrea.cabibbo*mailsymbol*uniroma2 dot it # # ###### #
##################################################################################
import cgitb; cgitb.enable()
import sys
import os
sys.path.insert(0, os.getcwd())
# sys.path.insert(0, "/var/www/vhosts/cellbiol.com/cgi-bin/py")
import cgi
import re
import string
from copy import deepcopy
def print_file(filename):
a=open(filename)
b=a.readlines()
a.close()
for line in b:
print line
def cleaner(seq):
i=0
so=[] #seq_out
for char in seq:
if char in string.digits or char==('\\' or '/'):
pass
#print char,'character number %s is digit!! deleted'%i
#elif char==' ' or char==' ' or (char in string.whitespace):
#print char,'is whitespace!! deleted'
if char not in 'GATCRYWSMKHBVDN': #IUPAC ambiguous_dna_values
pass
#so.append(in_seq[i])
# print 'WARNING: INVALID DNA CHARACTER %s DETECTED IN POSITION %s. Character NOT deleted'%(char,str(i+1))
else:
so.append(seq[i])
#print char, 'normal character, not a number, not deleted'
i+=1
so=string.join(so,'')
return so
def seq_format(seq,step=60,step2='none'):
workseq=list(seq)
outseq=[]
if step2=='none':
for i in range(0,len(workseq)):
if float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
else:
outseq.append(workseq[i])
else:
for i in range(0,len(workseq)):
if float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' + ')
elif float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
elif float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])!=0.0:
outseq.append(workseq[i])
outseq=string.join(outseq,'')
return outseq
def re_const(oligo):
oligo=oligo.upper()
wo=list(oligo)
re_out=[]
for char in oligo:
if char=='G':
re_out.append('[GRSKVDBXN]')
elif char=='A':
re_out.append('[AMRWVHDXN]')
elif char=='T':
re_out.append('[TWYKHDBXN]')
elif char=='C':
re_out.append('[CMSVHBXN]')
elif char=='M':
re_out.append('[MACVHXN]')
elif char=='R':
re_out.append('[RAGVDXN]')
elif char=='W':
re_out.append('[WATHDXN]')
elif char=='S':
re_out.append('[SCGVBXN]')
elif char=='Y':
re_out.append('[YCTHBXN]')
elif char=='K':
re_out.append('[KGTDBXN]')
elif char=='V':
re_out.append('[VACGXNMRS]')
elif char=='H':
re_out.append('[HACTXNMWY]')
elif char=='D':
re_out.append('[DAGTXNRWK]')
elif char=='B':
re_out.append('[BCGTXNSYK]')
elif char=='X':
re_out.append('[XNMRWSYKVHDBGATC]')
elif char=='N':
re_out.append('[NXMRWSYKVHDBGATC]')
re_out=string.join(re_out,'')
re_out_comp=re.compile(re_out,re.IGNORECASE)
return re_out_comp
def match_oligo(seq,oligo,mismatches=0):
re_oligo=re_const(oligo)
L_out=[]
for match in re_oligo.finditer(seq):
L_out.append([match.start(),match.end(),match.group()])
return L_out
"""
def tagger(seq,matches_array,tag1='',tag2=''):
ma=matches_array
workseq=list(seq)
outseq=[]
for match in ma:
print match[0],match[1],8*' ',match[2],'
'
for i in range(0,len(workseq)):
if i ==match[0]:
outseq.append(tag1)
outseq.append(workseq[i])
elif i ==match[1]-1:
outseq.append(workseq[i])
outseq.append(tag2)
else:
outseq.append(workseq[i])
outseq=string.join(outseq,'')
return outseq
"""
def tagger(seq,matches_array,tag1='',tag2=''):
ma=matches_array
workseq=list(seq)
outseq=[]
for i in range(0,len(workseq)):
for match in ma:
if i ==match[0]:
outseq.append(tag1)
outseq.append(workseq[i])
break
elif i ==match[1]-1:
outseq.append(workseq[i])
outseq.append(tag2)
break
else:
outseq.append(workseq[i])
outseq=string.join(outseq,'')
return outseq
def seq_fat(seq,step,matches_array, step2='none', tag1='',tag2=''): # Format And Tag
workseq2=list(seq)
workseq=deepcopy(workseq2)
ma=matches_array
outseq=[]
if step2=='none':
for i in range(0,len(workseq2)):
check='nope'
for match in ma:
if i ==match[0]:
if float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(tag1)
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
check='done'
break
else:
outseq.append(tag1)
outseq.append(workseq[i])
check='done'
break
elif i ==match[1]-1:
if float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(tag2)
outseq.append(' %s
'%str(i+1))
check='done'
break
else:
outseq.append(workseq[i])
outseq.append(tag2)
check='done'
break
else:
check='nope'
if check=='nope':
if float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
else:
outseq.append(workseq[i])
outseq=string.join(outseq,'')
return outseq
if __name__=='__main__':
print 'content-type: text/html\n\n'
print_file('html/header.txt')
form = cgi.FieldStorage()
#print string.join(form.keys(),', ')
if form['oligo'].value=='':
#or (not form.has_key('seq')):
print 'The oligo is missing, please try again'
else:
in_seq=form['seq'].value
in_seq=in_seq.upper()
oligo=form['oligo'].value
oligo=oligo.upper()
in_seq=list(in_seq)
myseq=cleaner(in_seq)
print 'Your cleaned input sequence:
','',seq_format(myseq),'
' #print 'Tagger only here...
','',tagger(myseq,match_oligo(myseq,'gaattc')),'','
' print 'Your sequence with matches highlighted in red and underlined
', '',seq_fat(myseq,60,match_oligo(myseq,oligo)),'' if match_oligo(myseq,oligo)!=[]: print '
The following matches were found:
' print '
Match Start | Match End | Sequence |
%s | %s | %s |
Sorry, no matches were found' print_file('html/footer.txt')