Pytorch深度学习入门与实战三——循环神经网络

1.常见的循环神经网络
RNN,LSTM,GRU

  • RNN
    torch.nn.RNN()
    单纯的RNN会出现随着地柜次数的增加,权重指数级爆炸或小时的问题,从而难以捕捉长时间的关联,导致RNN训练是收敛困难。

  • LSTM
    引入门的机制,使网络有更强的记忆能力。
    LSTM信息处理方面的三个阶段:

  • 遗忘阶段。对上一阶段传进来的输入进行选择性忘记。

  • 选择记忆阶段。

  • 输出阶段

  • GRU(循环们控制单元)
    LSTM需要训练较多的参数,训练难度较大。
    GRU通过将遗忘门和输入门组合在一起,减少了门的数量。
    2.RNN手写字体分类

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim as optim
import torchvision
import torch.utils.data as Data
from torchvision import transforms
import hiddenlayer as hl

train_data=torchvision.datasets.MNIST(
    root="./data/MNIST",train=True,transform=transforms.ToTensor(),download=False
)
train_loader=Data.DataLoader(
    dataset=train_data,batch_size=64,shuffle=True,num_workers=2
)
test_data=torchvision.datasets.MNIST(
    root="./data/MNIST",train=False,transforms=transforms.ToTensor(),download=False
)
test_loader=Data.DataLoader(
    dataset=test_data,batch_size=64,shuffle=True,num_workers=2
)

class RNNimc(nn.Module):
    def __init__(self,input_dim,hidden_dim,layer_dim,output_dim):
        super(RNNimc,self).__init__()
        self.hidden_dim=hidden_dim
        self.layer_dim=layer_dim
        self.rnn=nn.RNN(input_dim,hidden_dim,layer_dim,batch_fitst=True,nonlinearity='relu')
        self.fc1=nn.Linear(hidden_dim,output_dim)
    def forward(self,x):
        out,h_n=self.rnn(x,None)
        out=self.fc1(out[:,-1,:])
        return out

input_dim=28
hidden_dim=128
layer_dim=1
output_dim=10
MyRNNimc=RNNimc(input_dim,hidden_dim,layer_dim,output_dim)
print(MyRNNimc)

h1_graph=h1.build_graph(MyRNNimc,torch.zeros([1,28,28]))
h1_graph.theme=h1.graph.THEMES["blue"].copy()
h1_graph


optimizer=torch.optim.RMSprop(MyRNNimc.parameters(),lr=0.0003)
criterion=nn.CrossEntropyLoss()
train_loss_all=[]
train_acc_all=[]
test_loss_all=[]
test_acc_all=[]
num_epochs=30
for epoch in range(num_epochs):
    print('EPoch {}/{}'.format(epoch,num_epochs-1))
    MyRNNimc.train()
    corrects=0
    train_num=0
    for step,(b_x,b_y) in enumerate(trian_loader):
        xdata=b_x.view(-1,28,28)
        output=MyRNNimc(xdata)
        pre_lab=torch.argmax(output,1)
        loss=criterion(output,b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss+=loss.item()*b_x.size(0)
        corrects+=torch.sum(pre_lab==b_y.data)
        train_num++b_x.size(0)
    train_loss_all.append(loss/train_num)
    train_acc_all.append(corrects.double().item()/train_num)
    print('{} Train Loss: {:.4f} Train Acc: {:.4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
    MyRNNimc.eval()
    corrects=0
    test_num=0

    for step,(b_x,b_y) in enumerate(test_loader):
        xdata=b_x.view(-1,28,28)
        output=MyRNNimc(xdata)
        pre_lab = torch.argmax(output, 1)
        loss = criterion(output, b_y)
        loss += loss.item() * b_x.size(0)
        corrects += torch.sum(pre_lab == b_y.data)
        train_num + +b_x.size(0)
    test_loss_all.append(loss / test_num)
    test_acc_all.append(corrects.double().item() / test_num)
    print('{} Test Loss: {:.4f} Test Acc: {:.4f}'.format(epoch, test_loss_all[-1], test_acc_all[-1]))
    
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(train_loss_all,"ro-",label="Train loss")
plt.plot()test_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1,2,2)
plt.plot(train_acc_all,"ro-",label="Train acc")
plt.plot(test_acc_all,"bs-",label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()

3.LSTM进行中文新闻分类
搭建一个分类器,对中文新闻数据进行分类。
该新闻数据集是THUCNews的一个子集,一共包含10类文本数据,没类数据有6500条文本。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.font_manager import FontProperties
fonts=FontProperties(fname="/Library/Fonts/华文细黑.ttf")
import re
import string
import time
import copy
from sklearn.metriics import accuracy_score,confusion_matrix
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim as optim
import jieba
import torch.utils.data as Data
from torchvision.vocab import Vectors

train_df=pd.read_csv("data/chap7/cnews/cnews.train.txt",sep="\t",
                     header=None,names=["label","text"])
val_df=pd.read_csv("data/chap7/cnews/cnews.val.txt",sep="\t",
                     header=None,names=["label","text"])
test_df=pd.read_csv("data/chap7/cnews/cnews.test.txt",sep="\t",
                     header=None,names=["label","text"])
stop_words=pd.read_csv("data/chap7/cnews/中文停用词库.txt",sep="\t",
                     header=None,names=["text"])
def chinese_pre(text_data):
    text_data=text_data.lower()
    text_data=re.sub("\d+","",text_data)
    text_data=list(jieba.cut(text_data,cut_all=False))
    text_data=[word.strip() for word in text_data if word not in stop_words.text.values]
    text_data=" ".join(text_data)
    return text_data

train_df["cutword"]=train_df.text.apply(chinese_pre)
val_df["cutword"]=val_df.text.apply(chinese_pre)
test_df["cutword"]=test_df.text.apply(chinese_pre)
train_df.cutword.head()

labelMap={"体育":0,"娱乐":1,"家居":2,"房产":3,"教育":4,"时尚":5,"时政":6,"游戏":7,"科技":8,"财经":9}
train_df["labelcode"]=train_df["label"].map(labelMap)
val_df["labelcode"]=val_df["label"].map(labelMap)
test_df["labelcode"]=test_df["label"].map(labelMap)

train_df[["labelcode","cutword"]].to_csv("data/chap7,cnews_train2.csv",index=False)
val_df[["labelcode","cutword"]].to_csv("data/chap7,cnews_val2.csv",index=False)
test_df[["labelcode","cutword"]].to_csv("data/chap7,cnews_test2.csv",index=False)

mytokenize=lambda x:x.split()
TEXT=data.Field(sequential=True,tokenize=mytokenize,include_lengths=True,use_vocb=True,batch_first=True,fix_length=400)
LABEL=data.Field(sequential=False,use_vocab=False,pad_token=None,unk_token=None)

text_data_fielsd=[
    ("labelcode",LABEL),
    ("cutword",TEXT)
]

traindata,valdata,testdata=data.TabularDataset.splits(
    path="data/chap7",format="csv",
    train="cnews_train2.csv",fields=text_data_fielsd,
    validation="cnews_val2.csv",
    test="cnews_test2.csv",skip_header=True
)
len(traindata),len(valdata),len(testdata)

TEXT.build_vocab(traindata,max_size=20000,vectors=None)
LABEL.build_vocab(traindata)

word_fre=TEXT.vocab.freqs.most_common(n=50)
word_fre=pd.DataFrame(data=word_fre,columns=["word","fre"])
word_fre.plot(x="word",y="fre",kind="bar",legend=False,figsize=(12,7))
plt.xticks(rotationn=90,fontproperties=fonts,size=10)
plt.show()

BATCH_SIZE=64
train_iter=data.BUcketIterator(traindata,batch_size=BATCH_SIZE)
val_iter=data.BUcketIterator(valdata,batch_size=BATCH_SIZE)
test_iter=data.BUcketIterator(testdata,batch_size=BATCH_SIZE)

class LSTMNet(nn.Module):
    def __init__(self,vocab_size,embedding_dim,hidden_dim,layer_dim,output_dim):
        super(LSTMNet, self).__init__()
        self.hidden_dim=hidden_dim
        self.layer_dim=layer_dim
        self.embedding=nn.Embedding(vocab_size,embedding_dim)
        self.lstm=nn.LSTM(embedding_dim,hidden_dim,layer_dim,batch_first=True)
        self.fc1=nn.Linear(hidden_dim,output_dim)
    def forward(selfself,x):
        wmbeds=self.embedding(x)
        r_out,(h_n,h_c)=self.lstm(embeds,None)
        out=self.fc1(r_out[:,-1,:])
        return out

vocab_size=len(TEXT.vocab)
embedding_dim=100
hidden_dim=128
layer_dim=1
output_dim=10
lstmmodel=LSTMNet(vocab_size,embedding_dim,hidden_dim,layer_dim,output_dim)
lstmmodel

def train_model2(model,traindataloader,valdataloader,criterion,optimizer,num_epochs=25,):
    train_loss_all=[]
    train_acc_all=[]
    val_loss_all=[]
    val_acc_all=[]
    since=time.time()
    for epoch in range(num_epochs):
        print('-'*10)
        print('Epoch {}/{}'.format(epoch,num_epochs-1))
        train_loss=0.0
        train_corrects=0
        train_num=0
        val_loss=0.0
        val_corrects=0
        val_num=0
        model.train()
        for step,(b_x,b_y) in enumerate(trian_loader):
            textdata,target=batch.cutword[0],batch.labelcode.view(-1)
            out=model(textdata)
            pre_lab=torch.argmax(out,1)
            loss=criterion(out,target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*len(target)
            train_corrects += torch.sum(pre_lab==target.data)
            train_num += len(target)
            train_loss_all.append(train_loss/train_num)
            train_acc_all.append(train_corrects.double().item()/train_num)
            print('{} Train Loss: {:.4f} Train Acc: {:.4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
        model.eval()
        for step,batch in enumerate(valdataloader):
            textdata,target=batch.cutword[0],batch.labelcode.view(-1)
            out=model(textdata)
            pre_lab=torch.argmax(out,1)
            loss=criterion(out,target)
            val_loss+=loss.item()*len(target)
            val_corrects+=torch.sum(pre_lab==target.data)
            val_num+=len(target)
        val_loss_all.append(val_loss/val_num)
        val_acc_all.append(val_corrects.double().item()/val_num)
        print('{} Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch, val_loss_all[-1], val_acc_all[-1]))
    train_process=pd.DataFrame(
        data={"epoch":range(num_epochs),
              "train_loss_all":train_loss_all,
              "train_acc_all":train_acc_all,
              "val_loss_all":val_loss_all,
              "val_acc_all":val_acc_all}
    )
    return model,train_process

optimizer=torch.optim.Adam(lstmmodel.parameters(),lr=0.0003)
loss_func=nn.CrossEntropyLoss()
lstmmodel,train_process=train_model2(lstmmodel,train_iter,val_iter,loss_func,optimizer,num_epochs=20)


plt.figure(figsize=(18,6))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"r.-",label="Train loss")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch number",size=13)
plt.ylabel("Loss value",size=13)
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"r.-",label="Train acc")
plt.plot(train_process.epoch,train_process.val_acc_all,"bs-",label="Val acc")
plt.xlabel("epoch number",size=13)
plt.ylabel("acc",size=13)
plt.legend()
plt.show()

lstmmodel.eval()
test_y_all=torch.LongTensor()
pre_lab_all=torch.LongTensor()

for step,batch in enumerate(test_iter):
    textdata,target=batch.cutword[0],batch.labelcode.view(-1)
    out=lstmmodel(textdata)
    pre_lab=torch.argmax(out,1)
    test_y_all=torch.cat((test_y_all,target))
    pre_lab_all=torch.cat(pre_lab_all,pre_lab)
acc=accuracy_score(test_y_all,pre_lab_all)
print("在测试数据集上的预测精度为:"acc)
class_label=["体育","娱乐","家居","房产","教育","时尚","时政","游戏","科技","财经"]
conf_mat=confusion_matrix(test_y_all,pre_lab_all)
df_cm=pd.DataFrame(conf_mat,index=class_label,columns=class_label)
heatmap=sns.heatmap(df_cm,annot=True,fmt="d",cmap="Y1GnBu")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(),rotation=0,ha='right',fontpoperties=fonts)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(),rotation=45,ha='right',fontpoperties=fonts)
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

可视化词向量分布

from sklearn.maniford import TSNE
lstmmodel=torch.load("data/chap7/lstmmodel.pkl")
word2vec=lstmmodel.embedding.weight
words=TEXT.vocab.itos
tsne=TSNE(n_componects=2,random_state=123)
word2vec_tsne=tsne.fit_trandform(word2vec.data.numpy())
plt.figure(figsize=(10,8))
plt.scatter(word2vec_tsne[:,0],word2vec_tsne[:,1],s=4)
plt.title("所有词向量的分布情况",fontproperties=fpnts,size=15)
plt.show()

vis_word=["中国","市场","公司","美国","记者","学生","游戏","北京","投资","电影","银行","工作","留学","大学","经济","产品","设计","方面","玩家","学校","房价","专家","楼市"]
vis_word_index=[words.index(ii) for ii in vis_word]
plt.figure(figsize=(10,8))
for ii,index in enumerate(vis_word_index):
    plt.scatter(word2vec_tsne[index,0],word2vec_tsne[index,1])
    plt.text(word2vec_tsne[index,0],word2vec_tsne[index,1],vis_word[ii],fontproperties=fonts)
    plt.title("词向量的分布情况",fontproperties=fonts,size=15)
    plt.show()
    

4.GRU网络进行情感分类
搭建一个对IMDB电影评论数据分类的GRU网络,该数据集imdb_train.csv和

imdb_test.csv在之前已经介绍和预处理。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim as optim
from torchvision import transforms
from torchtext import data
from torchvision.vocab import Vectors

mytokenize=lambda x:x.split()
TEXT=data.Field(sequential=True,tokenize=mytokenize,include_lengths=True,use_vocab=True,batch_first=True,fix_length=200)
LABEL=data.Field(sequential=False,use_vocab=False,pad_token=None,unk_token=None)

train_test_fields=[
    ("label",LABEL),
    ("text",TEXT)
]
traindata,testdata=data.TabularDataset.splits(
    path="./data/chap6",format="csv",
    train="imdb_train.csv",fields=train_test_fields,
    test="imdb_test.csv",skip_header=True
)

vec=Vectors("gloves.6b.100d.txt","./data")
TEXT.build_vocab(traindata,max_size=20000,vectors=vec)
LABEL.build_vocab(traindata)
BATCH_SIZE=32
train_iter=data.BucketIterator(traindata,batch_size=BATCH_SIZE)
test_iter=data.BucketIterator(testdata,batch_size=BATCH_SIZE)

class GRUNet(nn.Module):
    def __init__(self,vocab_size,embedding_dim,hidden_dim,layer_dim,output_dim):
        super(GRUNet,self).__init__()
        self.hidden_dim=hidden_dim
        self.layer_dim=layer_dim
        self.emdedding=nn.Embedding(vocab_size,embedding_dim)
        self.gru=nn.GRU(embedding_dim,hidden_dim,layer_dim,batch_first=True)
        self.fc1=nn.Sequential(
            nn.Linear(hidden_dim,hidden_dim),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            nn.Linear(hidden_dim,output_dim)
        )
    def forward(selfself,x):
        embeds=self.embedding(x)
        r_out,h_n=self.gru(embeds,None)
        out=self.fc1(r_out[:,-1,:])
        return out

vocab_size=len(TEXT.vocab)
embedding_dim=vec.dim
hidden_dim=128
layer_dim=1
output_dim=2
grumodel=GRUNet(vocab_size,embedding_dim,hidden_dim,layer_dim,output_dim)
grumodel

grumodel.embedding.weight.data.copy_(TEXT.vocab.vectors)
UNK_IDX=TEXT.vocab.stoi[TEXT.unk_token]
PAD_IDX=TEXT.vocab.stoi[TEXT.pad_token]
grumodel.embedding.weight.data[UNK_IDX]=torch.zeros(vec.dim)
grumodel.embedding.weight.data[PAD_IDX]=torch.zeros(vec.dim)

def train_model(model,traindataloader,testdataloader,criterion,optimizer,num_epochs=25):
    train_loss_all=[]
    train_acc_all=[]
    test_loss_all=[]
    test_acc_all=[]
    learn_rate=[]
    since=time.time()
    scheduler=optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.1)
    for epoch in range(num_epochs):
        learn_rate.append(scheduler.get_lr()[0])
        print('-'*10)
        print('EPoch {}/{}, Lr:{}'.format(epoch,num_epochs-1,learn_rate[-1]))
        train_loss=0.0
        train_corrects=0
        train_num=0
        test_loss=0.0
        test_corrects=0
        test_num=0
        model.train()
        for step,batch in enumerate(traindataloader):
            textdata,target=batch.text[0],batch.label
            out=model(textdata)
            pre_lab=torch.argmax(out,1)
            loss=criterion(out,target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*len(target)
            train_corrects += torch.sum(pre_lab==target.data)
            train_num += len(target)
        train_loss_all.append(train_loss/train_num)
        train_acc_all.append(train_corrects.double().item()/train_num)
        print('{} Train Loss : {:.4f} Train Acc :{:.4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
        scheduler.step()
        model.eval()
        
        for step,batch in enumerate(testdataloader):
            textdata,target=batch.text[0],batch.label
            out=model(textdata)
            pre_lab=torch.argmax(out,1)
            loss=criterion(out,target)
            test_loss+=loss.item()*len(target)
            test_corrects+=torch.sum(pre_lab==target.data)
            test_num+=len(target)
        test_loss_all.append(test_loss/test_num)
        test_acc_all.append(test_corrects.double().item()/test_num)
        print('{} Test Loss : {:.4f} Test Acc :{:.4f}'.format(epoch,test_loss_all[-1],test_acc_all[-1]))
        
    train_process=pd.DataFrame(
        data={"epoch":range(num_epochs),
              "train_loss_all":train_loss_all,
              "train_acc_all":train_acc_all,
              "test_loss_all":test_loss_all,
              "test_acc_all":test_acc_all,
              "learn_rate":learn_rate}
    )
    return model,train_process
optimizer=optim.RMSprop(grumodel.parameters(),lr=0.003)
loss_func=nn.CrossEntropyLoss()
grumodel,train_process=train_model(grumodel,train_iter,test_iter,loss_func,optimizer,num_epochs=10)

plt.figure(figsize=(18,6))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"r.-",label="Train loss")
plt.plot(train_process.epoch,train_process.test_loss_all,"bs-",label="Test loss")
plt.legend()
plt.xlabel("Epoch number",size=13)
plt.ylabel("Loss value",size=13)
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"r.-",label="Train acc")
plt.plot(train_process.epoch,train_process.test_acc_all,"bs-",label="Test acc")
plt.xlabel("Epoch number",size=13)
plt.ylabel("Acc",size=13)
plt.legend()
plt.show()

grumodel.eval()
test_y_all=torch.LongTensor()
pre_lab_all=torch.LongTensor()
for step,batch in enumerate(test_iter):
    textdata,target=batch.text[0],batch.label.view(-1)
    out=grumodel(testdata)
    pre_lab=torch.argmax(out,1)
    test_y_all=torch.cat((test_y_all,target))
    pre_lab_all=torch.cat((pre_lab_all,pre_lab))
acc=accuracy_score(test_y_all,pre_lab_all)
print("测试数据集上的预测精度为: ",acc)

你可能感兴趣的:(笔记,神经网络,自然语言处理)