RNN LSTM GRU 代码实战 ---- 简单的文本生成任务
import torch
if torch.cuda.is_available():
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1070
设置参数
import torchtext
from torchtext.vocab import Vectors
import numpy as np
import random
random.seed(53113)
np.random.seed(53113)
torch.manual_seed(53113)
BATCH_SIZE = 32
EMBEDDING_SIZE = 650
MAX_VOCAB_SIZE = 50000
下载数据集
TEXT = torchtext.data.Field(lower=True)
train, val, test = torchtext.datasets.LanguageModelingDataset.splits(
path=".",
train="train.txt",
validation="dev.txt",
test="test.txt",
text_field=TEXT)
构造词典
TEXT.build_vocab(train, max_size=MAX_VOCAB_SIZE)
print("vocabulary size: {}".format(len(TEXT.vocab)))
vocabulary size: 50002
print(TEXT.vocab.itos[0:50])
['', '', 'the', 'of', 'and', 'one', 'in', 'a', 'to', 'zero', 'nine', 'two', 'is', 'as', 'eight', 'for', 's', 'five', 'three', 'was', 'by', 'that', 'four', 'six', 'seven', 'with', 'on', 'are', 'it', 'from', 'or', 'his', 'an', 'be', 'this', 'he', 'at', 'which', 'not', 'also', 'have', 'were', 'has', 'but', 'other', 'their', 'its', 'first', 'they', 'had']
print(list(TEXT.vocab.stoi.items())[0:50])
[('', 0), ('', 1), ('the', 2), ('of', 3), ('and', 4), ('one', 5), ('in', 6), ('a', 7), ('to', 8), ('zero', 9), ('nine', 10), ('two', 11), ('is', 12), ('as', 13), ('eight', 14), ('for', 15), ('s', 16), ('five', 17), ('three', 18), ('was', 19), ('by', 20), ('that', 21), ('four', 22), ('six', 23), ('seven', 24), ('with', 25), ('on', 26), ('are', 27), ('it', 28), ('from', 29), ('or', 30), ('his', 31), ('an', 32), ('be', 33), ('this', 34), ('he', 35), ('at', 36), ('which', 37), ('not', 38), ('also', 39), ('have', 40), ('were', 41), ('has', 42), ('but', 43), ('other', 44), ('their', 45), ('its', 46), ('first', 47), ('they', 48), ('had', 49)]
创建迭代器
train_iter, val_iter, test_iter = torchtext.data.BPTTIterator.splits(
(train, val, test),
batch_size=BATCH_SIZE,
bptt_len=50,
repeat=False,
shuffle=True)
构建语言模型
import torch
import torch.nn as nn
class My_Model(nn.Module):
def __init__(self, rnn_type,ntoken,ninp,nhid,nlayers,dropout=0.5):
super(My_Model,self).__init__()
self.drop = nn.Dropout(dropout)
self.encoder = nn.Embedding(ntoken,ninp)
self.rnn_type = rnn_type
self.nhid = nhid
self.nlayers = nlayers
self.rnn = getattr(nn, rnn_type)(ninp,nhid,nlayers,dropout=dropout)
print(getattr(nn, rnn_type))
self.decoder = nn.Linear(nhid, ntoken)
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange,initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange,initrange)
def forward(self, input,hidden):
emb = self.encoder(input)
emb = self.drop(emb)
rnn_layer, hidden = self.rnn(emb,hidden)
rnn_layer = self.drop(rnn_layer )
output = self.decoder(rnn_layer.view(rnn_layer.size(0)*rnn_layer.size(1),rnn_layer.size(2)))
output = output.view(rnn_layer.size(0), rnn_layer.size(1), output.size(1))
return output,hidden
def init_hidden(self, bsz, requires_grad=True):
weight = next(self.parameters())
if self.rnn_type == 'LSTM':
return (weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad),
weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad))
else:
return weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad)
nhid_size = 1000
VOCAB_SIZE = MAX_VOCAB_SIZE
model = My_Model("LSTM",MAX_VOCAB_SIZE,EMBEDDING_SIZE,nhid_size,2,dropout=0.5)
model = model.cuda()
loss_fn = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
截断运算图,减少内存压力
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
模型评估
def evaluate(model, data):
model.eval()
total_loss = 0.
it = iter(data)
total_count = 0.
with torch.no_grad():
hidden = model.init_hidden(BATCH_SIZE, requires_grad=False)
for i, batch in enumerate(it):
data, target = batch.text, batch.target
data, target = data.cuda(), target.cuda()
hidden = repackage_hidden(hidden)
with torch.no_grad():
output, hidden = model(data, hidden)
loss = loss_fn(output.view(-1, VOCAB_SIZE), target.view(-1))
total_count += np.multiply(*data.size())
total_loss += loss.item() * np.multiply(*data.size())
loss = total_loss / total_count
model.train()
return loss
开始训练模型
import copy
GRAD_CLIP = 1.
NUM_EPOCHS = 2
val_losses = []
for epoch in range(NUM_EPOCHS):
model.train()
it = iter(train_iter)
hidden = model.init_hidden(BATCH_SIZE)
for i, batch in enumerate(it):
data, target = batch.text, batch.target
data, target = data.cuda(), target.cuda()
hidden = repackage_hidden(hidden)
model.zero_grad()
output, hidden = model(data, hidden)
loss = loss_fn(output.view(-1, VOCAB_SIZE), target.view(-1))
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
optimizer.step()
if i % 1000 == 0:
print("epoch", epoch, "iter", i, "loss", loss.item())
if i % 10000 == 0:
val_loss = evaluate(model, val_iter)
if len(val_losses) == 0 or val_loss < min(val_losses):
print("best model, val loss: ", val_loss)
torch.save(model, "best_model.pkl")
else:
scheduler.step()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
val_losses.append(val_loss)
epoch 0 iter 0 loss 10.730563163757324
best model, val loss: 10.478901235690277
D:\Anaconda\envs\jianbo\lib\site-packages\torch\serialization.py:402: UserWarning: Couldn't retrieve source code for container of type My_Model. It won't be checked for correctness upon loading.
"type " + obj.__name__ + ". It won't be checked "
epoch 0 iter 1000 loss 6.0242919921875
epoch 0 iter 2000 loss 6.029582500457764
epoch 0 iter 3000 loss 5.8461594581604
epoch 0 iter 4000 loss 5.5147223472595215
epoch 0 iter 5000 loss 5.937921047210693
epoch 0 iter 6000 loss 5.6236090660095215
epoch 0 iter 7000 loss 5.482613563537598
epoch 0 iter 8000 loss 5.344069004058838
epoch 0 iter 9000 loss 5.418025970458984
epoch 1 iter 0 loss 5.486691474914551
best model, val loss: 5.002634433592716
epoch 1 iter 1000 loss 5.0923237800598145
epoch 1 iter 2000 loss 5.381066799163818
epoch 1 iter 3000 loss 5.237982273101807
epoch 1 iter 4000 loss 4.973425388336182
epoch 1 iter 5000 loss 5.4851861000061035
epoch 1 iter 6000 loss 5.201869010925293
epoch 1 iter 7000 loss 5.1173810958862305
epoch 1 iter 8000 loss 5.007303237915039
epoch 1 iter 9000 loss 5.120178699493408
保存训练后的模型
torch.save(model, "final_model.pkl")
取出最好的模型
nhid_size = 1000
best_model = My_Model("LSTM", VOCAB_SIZE, EMBEDDING_SIZE, nhid_size, 2, dropout=0.5)
best_model = best_model.cuda()
PATH = './best_model.pkl'
best_model = torch.load(PATH)
简单生成一些摘要文本
hidden = best_model.init_hidden(1)
input = torch.randint(VOCAB_SIZE, (1, 1), dtype=torch.long).to(device)
words = []
for i in range(100):
output, hidden = best_model(input, hidden)
word_weights = output.squeeze().exp().cpu()
word_idx = torch.multinomial(word_weights, 1)[0]
input.fill_(word_idx)
word = TEXT.vocab.itos[word_idx]
words.append(word)
print(" ".join(words))
who become born epicurus and looking for them as a is to print using hypocrisy that married his corresponding a buffer of his bicycle and put her came that into the drink the abuse of manganese s into the liver and prayers the second it is his own nowhere of the earth recognizes his origin but has primarily been used by arthur gardner largely written for this reason he differs from his eight sequel to the catherine copula which appears to be that of it encoding beethoven s demonstration the last ship desires to invent wittenberg was an