统计学习方法 第6章 习题

6.1确认逻辑斯蒂分布属于指数分布族:确认

6.2逻辑斯蒂回归模型的梯度下降法

# -*- coding: utf-8 -*-


import numpy as np
import matplotlib.pyplot as plt

            
class Logistic():
    def __init__(self,X,Y):
        self.X = np.c_[X,np.ones(X.shape[0])]
        self.Y = Y
        self.w = np.zeros(self.X.shape[1])
        
    def prob(self,w,xin):
        prb = 1.0/(1+np.exp(-np.dot(w,xin)))
        return 1-prb,prb
    
    def training(self,e,rate):
        def Lw(X,Y,w):
            sum = 0.0
            for i,yi in enumerate(Y):
                sum += yi*np.dot(w,X[i])-np.log(1+np.exp(np.dot(w,X[i])))
            return sum
                
        def gradient(X,Y,w):
            sum = np.zeros(X.shape[1])
            for i,xi in enumerate(X):
                _,prb = self.prob(w,xi)
                sum += xi*(Y[i] - prb)
            return sum  
        
        #附录A 梯度下降
        #第一步初始化
        X = self.X
        Y = self.Y
        w = self.w
        prelw = 0
        gw = 0
        gwf = float("inf")#直接进入迭代
        #当梯度范数小于e时停止迭代
        while gwf > e :#可以加上最大迭代次数
            w += rate*gw
            #计算L(w)
            nowlw = Lw(X, Y, w)
            print(w)
            #当前后的似然函数变化小于e时停止迭代
            if abs(prelw-nowlw) < e:
                break
            else:
                prelw = nowlw
                #计算梯度
                gw = gradient(X, Y, w)
                #梯度范数
                gwf = np.linalg.norm(gw,ord=2)                
                
        print("success")
        self.w = w
        print(w)
                
            

def main():
    np.random.seed(12)
    num_observations = 5000
    #这是数据集,这是借鉴的
    x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
    x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)

    
    X = np.vstack((x1, x2)).astype(np.float32)
    Y = np.hstack((np.zeros(num_observations),
                   np.ones(num_observations)))
        
    lr = Logistic(X, Y)
    lr.training(0.01,0.001)
    yout = []
    #求训练模型的正确率
    for xin in lr.X:
        prob0,prob1 = lr.prob(lr.w,xin)
        yout.append(0 if prob0 > prob1 else 1)
    acc = sum([1 if yout[i] == Y[i] else 0 for i in range(len(Y))])/len(Y)
    
    print(acc)
    
    plt.figure(figsize=(12,8))
    plt.scatter(X[:, 0], X[:, 1],c = Y, alpha = .4)    
    
    plt.show()    
    pass


if __name__ == '__main__':
    main()

6.3写出最大熵的DFP算法

# -*- coding: utf-8 -*-
import numpy as np


class maxenporty:
    def __init__(self,path):
        self.M = 100
        self.labels = ['B','M','E','S']
        self.LoadData(path)
        self.Initallparamt()
    
    #导入训练文件         
    def LoadData(self,path):
        self.traindata = []
        with open(path,encoding='utf8') as f:
            for line in f:
                if line == [] or line == '\n':
                    return
                line = line.strip()
                linelist = line.split()
                
                for l in linelist:
                     self.traindata.append(l.split('/'))
    #初始化参数               
    def Initallparamt(self): 
        N = len(self.traindata)
        #fi(x,y)特征函数
        self.f = [lambda x,y,_x=xy[0],_y=xy[1]:1 if x==_x and y==_y else 0 for xy in self.traindata]
        #断言下
        assert N == len(self.f) 
        #初始化权重w
        self.w = np.zeros(N)
        #联合概率p(x,y)
        pxy_dic = {}
        #模型概率p(y|x)
        pyx_dic = {}
        #x的经验分布
        px_dic = {}
        #计算经验分布p(x,y),p(x)
        for xy in self.traindata:
            if (xy[0],xy[1]) not in pxy_dic:
                pxy_dic[(xy[0],xy[1])] = 0.0
                pyx_dic[(xy[0],xy[1])] = 0.0#这边只是单纯的初始化下,之后会使用
            pxy_dic[(xy[0],xy[1])] += 1.0
            if xy[0] not in px_dic:
                px_dic[xy[0]] =0.0
            px_dic[xy[0]] += 1.0
        for x,xy in zip(px_dic.keys(),pxy_dic.keys()):
            px_dic[x] /= N
            pxy_dic[xy] /= N      
        self.px_dic = px_dic    #p(x)
        self.pxy_dic = pxy_dic  #p(x,y)
        self.pyx_dic = pyx_dic  #p(y|x)
        
    #统计书中公式6.23求Zw(x)  
    def ComputZw(self,x):
        zw = 0.0
        for xy in self.traindata:
            zw += self.Computwf(x, xy[1])
        return zw  
    #统计书中公式6.22求分子  
    def Computwf(self,x,y):
        sumwf = 0.0
        for i,wi in enumerate(self.w):
            sumwf += wi*self.f[i](x,y)
        return np.exp(sumwf)
    #求Pw(y|x)
    def ComputPwyx(self,x,y):
        return  self.Computwf(x, y)/self.ComputZw(x) 
    
    #计算关于经验分布P(x,y)的期望值
    def ComputEp_(self,fi):
        ep_ = 0.0
        for xy in self.pxy_dic.keys():
            ep_ += self.pxy_dic[xy]*fi(xy[0],xy[1])
        return ep_    
               
    #IIS算法
    def trainIIS(self,max_iter = 10):  
        #计算关于经验分布p(x),模型P(y|x)的期望值
        def ComputEp(fi):
            ep = 0.0
            for xy in self.pyx_dic.keys():
                ep += self.px_dic[xy[0]]*self.pyx_dic[xy]*fi(xy[0],xy[1])
            return ep  
        
        #初始化delta
        delta = np.zeros(len(self.w))
        print(delta)
        f = self.f
        #未进行w收敛判断,可以算||wpre-w||范数

训练数据为'danci.txt':

1/B 9/M 8/M 6/M 年/E ,/S 
十/B 亿/E 中/B 华/E 儿/B 女/E 踏/B 上/E 新/S 的/S 征/B 程/E 。/S 
过/B 去/E 的/S 一/S 年/S ,/S 
是/S 全/B 国/E 各/B 族/E 人/B 民/E 在/S 中/B 国/E 共/B 产/M 党/E 领/B 导/E 下/S ,/S 
在/S 建/B 设/E 有/S 中/B 国/E 特/B 色/E 的/S 社/B 会/M 主/M 义/E 道/B 路/E 上/S ,/S 

 

你可能感兴趣的:(自然语言处理)