Calculate Cys-Richness for a protein

'''
Code description: 
Calculate Cys-richness of a protein with criteria set as: 
 >=4 'C's over the length of protein AND >=5% total cysteine content
Function: 
Take input sequence => Count number of 'C's & length => Calculate percentage
Output True or False if criteria is met
'''

from Bio import SeqIO

def Cys_rich(record_seq):
    C_count = record_seq.count('C')
    seq_len = len(record_seq)
    Cys_perc = float(C_count) / float(seq_len) * 100
    if C_count >= 4.0 and Cys_perc >= 5.0:
        return 'Cys-rich'
    else:
        return 'No'

CysRichSeq = []

for record in SeqIO.parse('filename.fasta', 'fasta'):
    if Cys_rich(record.seq) == 'Cys-rich':
        CysRichSeq.append(record)

SeqIO.write(CysRichSeq, 'Cys-rich_sequences.fasta', 'fasta')
print 'Cys-rich sequences written to file..'

Comments

Popular posts from this blog

Condense fasta header

Map multiple annotations using pandas

Fasta Header Replacer