'''
Code description:
Calculate Cys-richness of a protein with criteria set as:
>=4 'C's over the length of protein AND >=5% total cysteine content
Function:
Take input sequence => Count number of 'C's & length => Calculate percentage
Output True or False if criteria is met
'''
from Bio import SeqIO
def Cys_rich(record_seq):
C_count = record_seq.count('C')
seq_len = len(record_seq)
Cys_perc = float(C_count) / float(seq_len) * 100
if C_count >= 4.0 and Cys_perc >= 5.0:
return 'Cys-rich'
else:
return 'No'
CysRichSeq = []
for record in SeqIO.parse('filename.fasta', 'fasta'):
if Cys_rich(record.seq) == 'Cys-rich':
CysRichSeq.append(record)
SeqIO.write(CysRichSeq, 'Cys-rich_sequences.fasta', 'fasta')
print 'Cys-rich sequences written to file..'
Comments
Post a Comment