统计学习第11章 习题

习题11.1

P(Y) = \frac{\Psi _{c}(Y_{1},Y_{2},Y_{3})*\Psi _{c}(Y_{2},Y_{3},Y_{4})}{Z}

 

习题11.2

理解前后向算法,而且我前面的隐马尔可夫里的前后向算法结果是相同的

 

习题11.3

理解如何从特征*权重对应x,y的概率,可以与前面的判别模型对比

# -*- coding:utf8 -*-  

"""
说明:CRF 梯度下降(不算完整的)

作者:nkenen 
	
时间:2020年5月10日 

"""
import numpy as np


class CRF():
    def __init__(self,path):
        self.traindata = []
        self.Y_set = set()
        self.X_set = set()
        self.px = {}#x的经验分布
        self.pxy = {}#p(x,y)联合概率
        self.totalcount = 0.0
        self.loaddata(path)
        self.f = self.createfi()
        self.wi = np.array([[0.0]*len(self.f)])
        assert self.wi.shape[1] == len(self.f)
       
    def loaddata(self,path):
        with open(path,encoding='utf8') as f:
            for line in f:
                if line == [] or line == '\n':
                    return
                line = line.strip()
                linelist = line.split()
                line_l = []
                line_l.append([None,'start'])
                for l in linelist:
                    xy = l.split('/')
                    line_l.append(xy)
                    if xy[0] not in self.px:
                        self.px[xy[0]] = 0.0
                    self.px[xy[0]]+=1
                    if xy[0] not in self.pxy and xy[1] not in self.pxy:
                        self.pxy[xy[0],xy[1]] = 0.0
                    self.pxy[xy[0],xy[1]]+=1                    
                    self.totalcount += 1
                    self.X_set.add(xy[0])
                    self.Y_set.add(xy[1])
                line_l.append([None,'end'])
                self.traindata.append(line_l)
        #最后预测序列时需要定位yi,转为list
        self.X_set = list(self.X_set)
        self.Y_set = list(self.Y_set)
        """
        print(self.Y_set)
        print(self.X_set)
        print(self.px)
        print(self.pxy)
        print(self.totalcount)        
        """
        pass
    #创建特征
    def createfi(self):
        feature1 = [lambda y_pre,y,x,i,_yp='start',_yi=yi:1 if y_pre==_yp and y==_yi and i==1 else 0 for yi in self.Y_set]
        feature2 = [lambda y_pre,y,x,i,_yp=yp,_yi=yi:1 if y_pre==_yp and y==_yi and i>1 else 0 for yp in self.Y_set for yi in self.Y_set]
        feature3 = [lambda y_pre,y,x,i,_yp=yp,_yi='end':1 if y_pre==_yp and y==_yi and i==(len(x)-1) else 0 for yp in self.Y_set]
        feature4 = [lambda y_pre,y,x,i,_yi=yi,_xi=xi:1 if y==_yi and x[i]==_xi else 0 for xi in self.X_set for yi in self.Y_set]
        
        return feature1+feature2+feature3+feature4
    #计算fi,我一直是从0开始,range可以限制,我忘记了
    def computfi(self,x_v,y_v,fi):
        fi_sum = 0.0
        for i,yi in enumerate(y_v):
            if i == 0:
                continue  
            fi_sum += fi(y_v[i-1],yi,x_v,i)
        return fi_sum 
    #计算g(w)的公式,感觉公式我有可能理解错误,有错误请联系我
    #计算ep(fi),书中p231,11.51
    def computep(self,x_v,y_v,fi):
        def computpxy(x_v,y_v):
            pxy = 1.0
            for i,yi in enumerate(y_v):
                if i == 0:
                    continue
                if yi == 'end':
                    pxy *= 1.0
                else:
                    pxy *=self.pxy[x_v[i],yi]/self.totalcount
            return pxy    
        
        return computpxy(x_v, y_v)*self.computfi(x_v, y_v, fi) 
    #前向算法,本次没有使用
    def forward(self,x_v,M_M):
        A_M = []
        for i in range(len(x_v)):
            if i ==0 :
                ai = np.array([1.0]*len(self.Y_set))
            else:
                ai = A_M[i-1].dot(M_M[i-1])
            A_M.append(ai)

        return A_M
    #后向算法,本次没有使用
    def backward(self,x_v,M_M):
        B_M = [None]*len(x_v)
        x_len = len(x_v)
        for i in range(x_len-1,-1,-1):
            if i ==len(x_v)-1 :
                bi = np.array([1.0]*len(self.Y_set))
            else:
                bi = M_M[i-1].dot(B_M[i+1].T)
            B_M[i] = bi

        return B_M  
    #矩阵Mi中的W
    def comptW(self,y_pre,y,x_v,i):    
        f_v = [] 
        for fi in self.f:
            f_v.append(fi(y_pre,y,x_v,i))

        return np.exp(self.wi.dot(np.array([f_v]).T)[0][0])    
    #计算ewp时,我使用了11.49公式的上半部分,和11.24的下半部分
    #因为是特定的y组合序列,若有错误请指正
    def computewp(self,x_v,y_v,fi):
        def computM(x_v):
            M_M = []
            for i,_ in enumerate(x_v):
                if i ==0 :
                    continue
                if i==1:
                    mi = np.array([[self.comptW('start', yi, x_v, i) if j==0 else 0.0 for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
                elif i == len(x_v)-1:
                    mi = np.array([[1 if k==0 else 0 for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
                else:
                    mi = np.array([[self.comptW(yp, yi, x_v, i) for k,yi in enumerate(self.Y_set)]for j,yp in enumerate(self.Y_set)])
                M_M.append(mi)
        
            return M_M
        """
        crf不完整体现:我并没有将所有可能路径算出,我假定了start=None
        因为我是在做习题11.4时才理解,M可以进行选择,由start和end的值去选择,其实用书中例题一样也可以,
        在计算pw(y|x)时没有加上前后向矩阵是错误,由于电脑计算慢,这边就不加上了,由于没有验证请自己重新编写
        """
        def computpwxy(x_v,y_v):
            #先获得所有的Mi矩阵
            M_M = computM(x_v)
            #用矩阵求Z比较容易
            Z = None
            for i,M in enumerate(M_M):
                if i == 0:
                    Z= M
                Z = Z.dot(M)
            #这时候Z[0][0]为所有的路径概率和
            pwxy = 1.0
            for i,yi in enumerate(y_v):
                if i ==0:
                    continue
                pwxy *= self.comptW(y_v[i-1], yi, x_v, i)
            return pwxy/Z[0][0]            
        #这是X序列的xi出现概率积,这边我不清楚理解的对不对
        def computpxv(x_v):
            pxv = 1.0
            for i,xi in enumerate(x_v):
                if i == 0 or i ==len(x_v)-1:
                    continue
                pxv *= self.px[xi]/self.totalcount
            return pxv
       
        
        return computpxv(x_v)*computpwxy(x_v, y_v)*self.computfi(x_v, y_v, fi)  
    
    #判断不多的梯度下降,因为我电脑实在太垃圾了,计算太久了还是简化终止判断 
    def traing(self,max_iter=100):       
        for _ in range(max_iter):
            gw = []
            for fi in self.f:   
                gwi = 0.0
                for xy_v in self.traindata:
                    x_v = [xy[0] for xy in xy_v]
                    y_v = [xy[1] for xy in xy_v]
                    
                    gwi += self.computewp(x_v, y_v, fi) - self.computep(x_v, y_v, fi)
                gw.append(gwi)   
            gw = np.array([gw])
            fanshu = np.linalg.norm(gw,ord=2)
            print(fanshu)
            #在这只判断gw的范数是否收敛,我没有收敛成功过
            if fanshu<=1e-10:
                break
            self.wi -= gw
            print(self.wi)
    #维特比预测   
    def predict(self,x_test):
        def computFi(y_pre,yi,x_v,i):
            Fi = []
            for fi in self.f:
                Fi.append(fi(y_pre ,yi,x_v,i))
            return np.array([Fi]).T
        delta = []
        psi = []
        for i in range(len(x_test)):
            deltai = []
            psii = []
            if i == 0:
                continue
            #由start开始
            elif i==1:
                for yj in self.Y_set:
                    deltai.append(self.wi.dot(computFi('start', yj, x_test, i))[0][0])
            #由end结束
            elif i==len(x_test)-1:
                max_prob = 0.0
                max_yj = None
                for j,yj in enumerate(self.Y_set): 
                    prob = delta[i-2][j]+self.wi.dot(computFi(yj, 'end', x_test, i))[0][0]
                    max_yj = yj if prob>max_prob else max_yj
                    max_prob = prob if prob>max_prob else max_prob 
                deltai.append(max_prob)
                psii.append(max_yj)                
            else:
                for yl in self.Y_set:
                    max_prob = 0.0
                    max_yj = None 
                    for j,yj in enumerate(self.Y_set):
                        prob = delta[i-2][j]+self.wi.dot(computFi(yj, yl, x_test, i))[0][0]
                        max_yj = yj if prob>max_prob else max_yj
                        max_prob = prob if prob>max_prob else max_prob
                    deltai.append(max_prob)
                    psii.append(max_yj)
            psi.append(psii)
            delta.append(deltai)
            
        print(psi)    
        path = []
        j = 0
        #回溯
        for i in range(len(x_test)-1,-1,-1):
            if i <=1:                
                break
            y_end = psi[i-1][j]
            j = self.Y_set.index(y_end)
            path.insert(0,y_end)
        print(path)     
               
                    
    

def main():
    crf = CRF("data.txt")
    crf.traing(3)
    #开始值结束值为None
    crf.predict([None,'我', '十分', '高兴', '地', '致以', '诚挚', '的', '问候', '!',None])
    


if __name__ == "__main__":
    main()

习题11.4

统计学习第11章 习题_第1张图片

 

 

 

 

你可能感兴趣的:(自然语言处理)