#!/usr/bin/python2 ################################################################################## # A DNA/Protein sequence cleaner written by Andrea Cabibbo # ###### # # Feel free to use/modify/redistribute the code # # # # # If you use significant parts of this code please preserve # # # # # this header. ####### # # # If you find bugs or have suggestions, please contact # # # # # the author at andrea.cabibbo@uniroma2.it # # ###### # ################################################################################## import cgitb; cgitb.enable() import sys import os sys.path.insert(0, os.getcwd()) #sys.path.insert(0, "/var/www/vhosts/cellbiol.com/cgi-bin/cleaner") import cgi import re import string from copy import deepcopy def print_file(filename): a=open(filename) b=a.readlines() a.close() for line in b: print line def print_table(content, width='90%',border='1',cellspacing='1',cellpadding='5',bordercolor='#669999'): print '
%s |
'%content
def cleaner(seq,alphabet='dna_una'):
abcd={'dna_una':'GATC','dna_amb':'GATCRYWSMKHBVDN','rna_una':'GAUC','rna_amb':'GAUCRYWSMKHBVDN', 'prot':'ACDEFGHIKLMNPQRSTVWY', 'prot_ext':'ACDEFGHIKLMNPQRSTVWYBXZ'}
wa=abcd[alphabet] # wa = Working Alphabet
i=0
so=[] #seq_out
for char in seq:
if char in string.digits:
pass
#print char,'character number %s is digit!! deleted'%i
#elif char==' ' or char==' ' or (char in string.whitespace):
#print char,'is whitespace!! deleted'
if char not in wa: # selected alphabet
pass
else:
so.append(seq[i])
#print char, 'normal character, not a number, not deleted'
i+=1
so=string.join(so,'')
return so
def seq_format(seq,step=60,step2='none'):
workseq=list(seq)
outseq=[]
if step2=='none':
for i in range(0,len(workseq)):
if float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
else:
outseq.append(workseq[i])
else:
for i in range(0,len(workseq)):
if float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' + ')
elif float(string.split(str((float(i)+1)/step),'.')[1])==0.0:
outseq.append(workseq[i])
outseq.append(' %s
'%str(i+1))
elif float(string.split(str((float(i)+1)/step),'.')[1])!=0.0 and float(string.split(str((float(i)+1)/step2),'.')[1])!=0.0:
outseq.append(workseq[i])
outseq=string.join(outseq,'')
return outseq
def reverse(seq):
os=list(seq)
os.reverse()
os=string.join(os,'')
return os
def complement(seq):
comp_dict={'G':'C','A':'T','T':'A','C':'G'}
wl=list(seq)
ol=[]
for nucl in wl:
ol.append(comp_dict[nucl])
return string.join(ol,'')
if __name__=='__main__':
print 'content-type: text/html\n\n'
print_file('html/header.txt')
form = cgi.FieldStorage()
#print string.join(form.keys(),', ')
keys=form.keys()
if keys==[]:
print 'WARNING: No sequence and parameters received on this side. Please try again'
elif form['seq'].value=='':
print 'WARNING: No sequence received on this side. Please try again'
else:
in_seq=form['seq'].value
# Checking for FASTA format
joined_seq=''
fasta_format=0
seq_title='unknown sequence'
seqlines=in_seq.splitlines(True)# l'argomento True fa tenere le newlines alla fine delle righe. Utile per rimontaggio del file com'era
for line in seqlines:
match=re.search('^>(.{1,150})$',line)
if match:
fasta_format=1
seq_title=match.group(1)
else:
joined_seq += line
in_seq=joined_seq
in_seq=in_seq.upper()
out_seq=cleaner(in_seq)
# RESPONDING TO OPTIONS
if form['trans_opt'].value=='rev_comp':
out_seq=complement(reverse(out_seq))
tf='Reverse and Complement'
elif form['trans_opt'].value=='rev':
out_seq=reverse(out_seq)
tf='Reverse'
elif form['trans_opt'].value=='comp':
out_seq=complement(out_seq)
tf='Complement'
#FORMATTING
if form['numbers'].value=='0':
pass
else:
out_seq=seq_format(out_seq,int(form['numbers'].value))
# PRINTING THE OUTPUT SEQUENCE
if form['case'].value=='low':
out_seq=out_seq.lower()
print 'Sequence Length: ',len(out_seq),'
'
print 'Selected Transformation: %s'%tf,'
'%(seq_title) if form['numbers'].value=='0': print_textarea(out_seq) elif form['numbers'].value!='0': print_table(''+out_seq+'') print_file('html/footer.txt')