理解前后向算法,而且我前面的隐马尔可夫里的前后向算法结果是相同的
理解如何从特征*权重对应x,y的概率,可以与前面的判别模型对比
# -*- coding:utf8 -*-
"""
说明:CRF 梯度下降(不算完整的)
作者:nkenen
时间:2020年5月10日
"""
import numpy as np
class CRF():
def __init__(self,path):
self.traindata = []
self.Y_set = set()
self.X_set = set()
self.px = {}#x的经验分布
self.pxy = {}#p(x,y)联合概率
self.totalcount = 0.0
self.loaddata(path)
self.f = self.createfi()
self.wi = np.array([[0.0]*len(self.f)])
assert self.wi.shape[1] == len(self.f)
def loaddata(self,path):
with open(path,encoding='utf8') as f:
for line in f:
if line == [] or line == '\n':
return
line = line.strip()
linelist = line.split()
line_l = []
line_l.append([None,'start'])
for l in linelist:
xy = l.split('/')
line_l.append(xy)
if xy[0] not in self.px:
self.px[xy[0]] = 0.0
self.px[xy[0]]+=1
if xy[0] not in self.pxy and xy[1] not in self.pxy:
self.pxy[xy[0],xy[1]] = 0.0
self.pxy[xy[0],xy[1]]+=1
self.totalcount += 1
self.X_set.add(xy[0])
self.Y_set.add(xy[1])
line_l.append([None,'end'])
self.traindata.append(line_l)
#最后预测序列时需要定位yi,转为list
self.X_set = list(self.X_set)
self.Y_set = list(self.Y_set)
"""
print(self.Y_set)
print(self.X_set)
print(self.px)
print(self.pxy)
print(self.totalcount)
"""
pass
#创建特征
def createfi(self):
feature1 = [lambda y_pre,y,x,i,_yp='start',_yi=yi:1 if y_pre==_yp and y==_yi and i==1 else 0 for yi in self.Y_set]
feature2 = [lambda y_pre,y,x,i,_yp=yp,_yi=yi:1 if y_pre==_yp and y==_yi and i>1 else 0 for yp in self.Y_set for yi in self.Y_set]
feature3 = [lambda y_pre,y,x,i,_yp=yp,_yi='end':1 if y_pre==_yp and y==_yi and i==(len(x)-1) else 0 for yp in self.Y_set]
feature4 = [lambda y_pre,y,x,i,_yi=yi,_xi=xi:1 if y==_yi and x[i]==_xi else 0 for xi in self.X_set for yi in self.Y_set]
return feature1+feature2+feature3+feature4
#计算fi,我一直是从0开始,range可以限制,我忘记了
def computfi(self,x_v,y_v,fi):
fi_sum = 0.0
for i,yi in enumerate(y_v):
if i == 0:
continue
fi_sum += fi(y_v[i-1],yi,x_v,i)
return fi_sum
#计算g(w)的公式,感觉公式我有可能理解错误,有错误请联系我
#计算ep(fi),书中p231,11.51
def computep(self,x_v,y_v,fi):
def computpxy(x_v,y_v):
pxy = 1.0
for i,yi in enumerate(y_v):
if i == 0:
continue
if yi == 'end':
pxy *= 1.0
else:
pxy *=self.pxy[x_v[i],yi]/self.totalcount
return pxy
return computpxy(x_v, y_v)*self.computfi(x_v, y_v, fi)
#前向算法,本次没有使用
def forward(self,x_v,M_M):
A_M = []
for i in range(len(x_v)):
if i ==0 :
ai = np.array([1.0]*len(self.Y_set))
else:
ai = A_M[i-1].dot(M_M[i-1])
A_M.append(ai)
return A_M
#后向算法,本次没有使用
def backward(self,x_v,M_M):
B_M = [None]*len(x_v)
x_len = len(x_v)
for i in range(x_len-1,-1,-1):
if i ==len(x_v)-1 :
bi = np.array([1.0]*len(self.Y_set))
else:
bi = M_M[i-1].dot(B_M[i+1].T)
B_M[i] = bi
return B_M
#矩阵Mi中的W
def comptW(self,y_pre,y,x_v,i):
f_v = []
for fi in self.f:
f_v.append(fi(y_pre,y,x_v,i))
return np.exp(self.wi.dot(np.array([f_v]).T)[0][0])
#计算ewp时,我使用了11.49公式的上半部分,和11.24的下半部分
#因为是特定的y组合序列,若有错误请指正
def computewp(self,x_v,y_v,fi):
def computM(x_v):
M_M = []
for i,_ in enumerate(x_v):
if i ==0 :
continue
if i==1:
mi = np.array([[self.comptW('start', yi, x_v, i) if j==0 else 0.0 for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
elif i == len(x_v)-1:
mi = np.array([[1 if k==0 else 0 for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
else:
mi = np.array([[self.comptW(yp, yi, x_v, i) for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
M_M.append(mi)
return M_M
"""
crf不完整体现:我并没有将所有可能路径算出,我假定了start=None
因为我是在做习题11.4时才理解,M可以进行选择,由start和end的值去选择,其实用书中例题一样也可以,
在计算pw(y|x)时没有加上前后向矩阵是错误,由于电脑计算慢,这边就不加上了,由于没有验证请自己重新编写
"""
def computpwxy(x_v,y_v):
#先获得所有的Mi矩阵
M_M = computM(x_v)
#用矩阵求Z比较容易
Z = None
for i,M in enumerate(M_M):
if i == 0:
Z= M
Z = Z.dot(M)
#这时候Z[0][0]为所有的路径概率和
pwxy = 1.0
for i,yi in enumerate(y_v):
if i ==0:
continue
pwxy *= self.comptW(y_v[i-1], yi, x_v, i)
return pwxy/Z[0][0]
#这是X序列的xi出现概率积,这边我不清楚理解的对不对
def computpxv(x_v):
pxv = 1.0
for i,xi in enumerate(x_v):
if i == 0 or i ==len(x_v)-1:
continue
pxv *= self.px[xi]/self.totalcount
return pxv
return computpxv(x_v)*computpwxy(x_v, y_v)*self.computfi(x_v, y_v, fi)
#判断不多的梯度下降,因为我电脑实在太垃圾了,计算太久了还是简化终止判断
def traing(self,max_iter=100):
for _ in range(max_iter):
gw = []
for fi in self.f:
gwi = 0.0
for xy_v in self.traindata:
x_v = [xy[0] for xy in xy_v]
y_v = [xy[1] for xy in xy_v]
gwi += self.computewp(x_v, y_v, fi) - self.computep(x_v, y_v, fi)
gw.append(gwi)
gw = np.array([gw])
fanshu = np.linalg.norm(gw,ord=2)
print(fanshu)
#在这只判断gw的范数是否收敛,我没有收敛成功过
if fanshu<=1e-10:
break
self.wi -= gw
print(self.wi)
#维特比预测
def predict(self,x_test):
def computFi(y_pre,yi,x_v,i):
Fi = []
for fi in self.f:
Fi.append(fi(y_pre ,yi,x_v,i))
return np.array([Fi]).T
delta = []
psi = []
for i in range(len(x_test)):
deltai = []
psii = []
if i == 0:
continue
#由start开始
elif i==1:
for yj in self.Y_set:
deltai.append(self.wi.dot(computFi('start', yj, x_test, i))[0][0])
#由end结束
elif i==len(x_test)-1:
max_prob = 0.0
max_yj = None
for j,yj in enumerate(self.Y_set):
prob = delta[i-2][j]+self.wi.dot(computFi(yj, 'end', x_test, i))[0][0]
max_yj = yj if prob>max_prob else max_yj
max_prob = prob if prob>max_prob else max_prob
deltai.append(max_prob)
psii.append(max_yj)
else:
for yl in self.Y_set:
max_prob = 0.0
max_yj = None
for j,yj in enumerate(self.Y_set):
prob = delta[i-2][j]+self.wi.dot(computFi(yj, yl, x_test, i))[0][0]
max_yj = yj if prob>max_prob else max_yj
max_prob = prob if prob>max_prob else max_prob
deltai.append(max_prob)
psii.append(max_yj)
psi.append(psii)
delta.append(deltai)
print(psi)
path = []
j = 0
#回溯
for i in range(len(x_test)-1,-1,-1):
if i <=1:
break
y_end = psi[i-1][j]
j = self.Y_set.index(y_end)
path.insert(0,y_end)
print(path)
def main():
crf = CRF("data.txt")
crf.traing(3)
#开始值结束值为None
crf.predict([None,'我', '十分', '高兴', '地', '致以', '诚挚', '的', '问候', '!',None])
if __name__ == "__main__":
main()