viterbi

import sys

import numpy as np
f=open(sys.argv[2],"r")
f2=open(sys.argv[1],"r")
f1=open("out.txt","w")
import math
#   隐状态



def compute(obs, states, start_p, trans_p, emit_p):
    max_p = np.zeros((len(obs), len(states)))
    path = np.zeros((len(states), len(obs)))
    for i in range(len(states)):
        max_p[0][i] = math.log(start_p[i]) + emit_p[i][obs[0]]
        path[i][0] = i
    for t in range(1, len(obs)):
        #print(t)
        newpath = np.zeros((len(states), len(obs)))
        for y in range(len(states)):
            prob = -100000000
            for y0 in range(len(states)):
                nprob = max_p[t-1][y0] + trans_p[y0][y] + emit_p[y][obs[t]]
                if nprob > prob:
                    prob = nprob
                    state = y0
            max_p[t][y] = prob
            for m in range(t):
                newpath[y][m] = path[state][m]
            newpath[y][t] = y
        path = newpath
    max_prob = -100000000
    path_state = 0
    for y in range(len(states)):
        if max_p[len(obs)-1][y] > max_prob:
            max_prob = max_p[len(obs)-1][y]
            path_state = y
    #print(max_prob)
    return path[path_state]
state_s = []
obser=[]#fasta file
alp=[]#alphabet


#get hmm file
lines=f2.readlines()
wordlist=lines[0].split()#将每一行的数字分开放在列表中
#for a in wordlist:#遍历每一行的数字
    #print (a)
statenum=int(wordlist[0])
hidden_state = []
for k in range(statenum):
    state_s.append(k)
    hidden_state.append(chr(ord('A')+k))
letternum=wordlist[1]
#print((wordlist[2]))
for i in range(len(wordlist[2])):
    alp.append((wordlist[2][i]).lower())
    #print(alp[i])
wordlist=lines[1].split()#将每一行的数字分开放在列表中
start_probability =[]
for a in wordlist:#遍历每一行的数字
    #print (a)
    start_probability.append(float(a))
#print(statenum)
#print(letternum)

transititon_probability=np.zeros((int(statenum),int(statenum)))
emission_probability=np.zeros((int(statenum),int(letternum)))
#print(emission_probability[2][2])
for nn in range(int(statenum)):
    wordlist=lines[nn+2].split()#将每一行的数字分开放在列表中
    #print(len(wordlist))
    for k in range(int(statenum)):
        transititon_probability[nn][k]=math.log(float(wordlist[k]))
        #print(transititon_probability[nn][k])
    for k in range(int(letternum)):
        emission_probability[nn][k]=math.log(float(wordlist[k+statenum]))
        #print("emi")
        #print(emission_probability[nn][k])
    #print("read%d"%(nn))




for line in f:#get fasta file
    for a in line:
        if(a==">"):
            break
        for k in range(len(alp)):
            if(a.lower()==alp[k]):
                obser.append(k)
#transititon_probability = np.array([[math.log(0.999), math.log(0.001)], [math.log(0.01), math.log(0.99)]])#transition table
#emission_probability = np.array([[math.log(0.35), math.log(0.15), math.log(0.15),math.log(0.35)], [math.log(0.15), math.log(0.35), math.log(0.35),math.log(0.15)]])#emission table
result = compute(obser, state_s, start_probability, transititon_probability, emission_probability)
coun=0
sta=-1
pos=1
#print(result)
for k in range(len(result)):
    if (sta != -1 and sta != int(result[k])):
        print("%7d%7d state %s" % (pos, k, hidden_state[sta]))
        f1.write("%7d%7d state %s\n" % (pos, k, hidden_state[sta]))
        if (sta == 1):
            coun += 1
        pos = k+1
    sta = int(result[k])
print("%7d%7d state %s" % (pos, k+1, hidden_state[sta]))
f1.write("%7d%7d state %s\n" % (pos, k+1, hidden_state[sta]))

  

你可能感兴趣的:(viterbi)