python 简单脚本练习3

计算蛋白质 MW, pI, EC(消光系数)

# -*- coding: UTF-8 -*-
__author__='zsc'

import sys

def ExPASY(P):
    AA="ACDEFGHIKLMNPQRSTVWY"
    AA_residue={'A': 71.0788,'C': 103.1388,'D': 115.0886,'E': 129.1155,'F': 147.1766,'G': 57.0519,'H': 137.1411,'I': 113.1594,'K': 128.1741,'L': 113.1594,'M': 131.1926,'N': 114.1038,'P': 97.1167,'Q': 128.1307,'R': 156.1875,'S': 87.0782,'T': 101.1051,'V': 99.1326,'W': 186.2132,'Y': 163.176}
    pI_e={'C': 9.0,'D': 4.0,'E': 4.5,'H': 6.4,'K': 10.4,'R': 12.0,'Y': 10.0}
    COOH="CDEY"
    NH2="HKR"

    number={k:P.count(k) for k in AA}
     
    a=0
    for k in AA:
        a += number[k]*AA_residue[k]
     
    E=(number['Y']*1490+number['W']*5500+number['C']/2*125)/a
     
    def f(x) :
        b=0
        c=0
        for m in COOH:
            b += (number[m]*(10**x))/(10**x+10**pI_e[m])
        for n in NH2:
            c += (number[n]*10**pI_e[n])/(10**x+10**pI_e[n])
        return b+10**x/(10**x+10**3.2)-10**8.2/(10**x+10**8.2)-c
         
    r=3.2
    s=12.0
    x=(r+s)/2
    for i in range(11) :
        if f(x)>0:
            s=x
            x=(r+s)/2
        elif f(x)<0:
            r=x
            x=(r+s)/2
     
    str_out = "MW="+str(round((a+18.01524)/1000,2))+"kD\tpI="+str(round(x,2))+"\t"
    str_out += "EC(消光系数)="+str(round(E,2))+"\n"
    return str_out


def calc(in_file, out_file):
    id = ''
    seq = ''
    with open(in_file, 'r') as f_in:
        with open(out_file, 'w') as f_out:
            for line in f_in:
                if line.strip() == '':
                    continue
                if line[0] == '>':
                    id = line.strip()
                else:
                    seq = line.strip()
                    f_out.write(id[1:]+"\t"+ExPASY(seq))


if __name__ == "__main__":
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    calc(input_file, output_file)

你可能感兴趣的:(python 简单脚本练习3)