【参考:《PyTorch深度学习实践》完结合集_哔哩哔哩_bilibili】
【参考 分类专栏:PyTorch 深度学习实践_错错莫的博客-CSDN博客】
全【参考 分类专栏:PyTorch_kodoshinichi的博客-CSDN博客】
全,巨详细【PyTorch_笔记 分类: pytorch | BirandaのBlog】
全【参考 分类专栏:pytorch深度学习实践_王泽的随笔的博客-CSDN博客】
多看官方API文档的参数,理解并熟记
PyTorch 深度学习实践 第9讲
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
# prepare dataset
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]) # 归一化,均值和方差
train_dataset = datasets.MNIST(root='../dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
# design model using class
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.l1 = torch.nn.Linear(784, 512)
self.l2 = torch.nn.Linear(512, 256)
self.l3 = torch.nn.Linear(256, 128)
self.l4 = torch.nn.Linear(128, 64)
self.l5 = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 784) # -1其实就是自动获取mini_batch
x = F.relu(self.l1(x))
x = F.relu(self.l2(x))
x = F.relu(self.l3(x))
x = F.relu(self.l4(x))
return self.l5(x) # 最后一层不做激活,不进行非线性变换
model = Net()
# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# training cycle forward, backward, update
def train(epoch):
model.train() # 训练模式
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
# 获得一个批次的数据和标签
inputs, target = data
optimizer.zero_grad()
# 获得模型预测结果(64, 10)
outputs = model(inputs)
# 交叉熵代价函数 直接传入模型输出,不用softmax处理
# outputs(64,10),target(64)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0 # 这里记得清零
def test():
model.eval() # 测试模式
correct = 0 # 正确的个数
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
# 返回 最大的数据,该数据的下标
_, predicted = torch.max(outputs.data, dim=1) # dim = 1 跨列 即取某行的最大值下标
total += labels.size(0) # lable:(N,1) size(0)就是取N
# predicted == labels N个数之间相同的个数
correct = correct + (predicted == labels).sum().item() # 张量之间的比较运算
print('accuracy on test set: %d %% ' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
# 一轮训练 一轮测试
train(epoch)
test()
output:
[1, 300] loss: 2.177
[1, 600] loss: 0.824
[1, 900] loss: 0.432
accuracy on test set: 89 %
[2, 300] loss: 0.320
[2, 600] loss: 0.269
[2, 900] loss: 0.229
accuracy on test set: 94 %
[3, 300] loss: 0.189
[3, 600] loss: 0.170
[3, 900] loss: 0.151
accuracy on test set: 95 %
[4, 300] loss: 0.127
[4, 600] loss: 0.118
[4, 900] loss: 0.121
accuracy on test set: 96 %
...
[9, 300] loss: 0.037
[9, 600] loss: 0.042
[9, 900] loss: 0.037
accuracy on test set: 97 %
[10, 300] loss: 0.030
[10, 600] loss: 0.033
[10, 900] loss: 0.034
accuracy on test set: 97 %
【参考:【PyTorch】PyTorch深度学习实践|视频学习笔记|P12-13|RNN_kodoshinichi的博客-CSDN博客】
权值共享,参数较少
【参考:Pytorch学习笔记013——Simplele_RNN | BirandaのBlog】这篇写得巨详细
理解: h t h_t ht是隐藏层输出,所以每个RNN Cell都相当于一个隐藏层(其本质上是一个线性层,可以对输入数据的维度进行变换输出)
中文文档:https://pytorch-cn.readthedocs.io/zh/stable/package_references/torch-nn/#recurrent-layers
技巧:看图理解参数
比如
RNN的输入: (input, h_0)
input (seq_len, batch, input_size):
h_0 (num_layers * num_directions, batch, hidden_size): 保存着初始隐状态的tensor
RNN的输出: (output, h_n)
num_layers = 3
import torch
# 方法一:自定义RNNCell的计算过程并计算
# 定义一个RNNCell关键在于明确其输入出的尺度即可
# cell = torch.nn.RNNCell(input_size = input_size,hidden_size = hidden_size)
# hidden = cell(input,hidden) #实例化一个隐层
# 使用自定义的RNNCell进行相关计算
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
# 1. 定义隐藏层输入输出的维度,后续调用时的传入的数据需要满足该维度 看官方API要求
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
# 将数据按照(seq,batch,features)的方式进行组装,这样的顺序便于之后取数据
dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size) # 按照指定维度全零初始化隐层大小
for idx, input in enumerate(dataset):
print('=' * 20, idx, '=' * 20)
print('Input_size: ', input.shape)
# 2. 调用Cell 数据维度看官方API要求
# input (batch_size,input_size)
# hidden (batch_size, hidden_size)
hidden = cell(input, hidden) # 循环逻辑中重要的一环
print('hidden size:', hidden.shape)
print(hidden)
'''
==================== 0 ====================
Input_size: torch.Size([1, 4])
hidden size: torch.Size([1, 2])
tensor([[ 0.6622, -0.3139]], grad_fn=)
==================== 1 ====================
Input_size: torch.Size([1, 4])
hidden size: torch.Size([1, 2])
tensor([[0.7954, 0.3579]], grad_fn=)
==================== 2 ====================
Input_size: torch.Size([1, 4])
hidden size: torch.Size([1, 2])
tensor([[0.6158, 0.6224]], grad_fn=)
'''
import torch
batch_size=1
seq_len =5 # 每个样本有多少个x
input_size=4 # 每个x有多少维 (1,input_size)
hidden_size=2 # 一个RNN层有多少个rnn_cell
num_layers=3
rnn=torch.nn.RNN(input_size=input_size,
hidden_size=hidden_size,
num_layers = num_layers)
# x_1...,x_n
input_data=torch.randn(seq_len,batch_size,input_size)
# h_0默认初始为全0张量
hidden_data=torch.zeros(num_layers,batch_size,hidden_size)
# out为 h_1 ... h_n
# hidden_out 为 h_n
out,hidden_out=rnn(input_data, hidden_data)
print("Output size: ", out.shape)
print("Output: ", out)
print("Hidden size: ", hidden_out.shape)
print("Hidden: ", hidden_out)
"""
Output size: torch.Size([5, 1, 2])
Output: tensor([[[-0.6886, -0.6379]],
[[-0.3161, -0.3697]],
[[-0.2525, -0.5492]],
[[-0.5135, -0.5937]],
[[-0.7819, -0.5441]]], grad_fn=)
Hidden size: torch.Size([3, 1, 2])
Hidden: tensor([[[-0.5465, -0.4946]],
[[-0.0979, -0.5234]],
[[-0.7819, -0.5441]]], grad_fn=)
"""
“hello” -> “ohlol”
import torch
import torch.nn as nn
input_size = 4
hidden_size = 4
batch_size = 1
"""
x_t -> (input_size,1)
h_t-1 -> (hidden_size,1) 比如 h_0 :(hidden_size,1)
"""
# 数据准备
# 构建输入输出字典
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
# 构造独热向量,此时向量维度为(seq_Len,input_size)
x_one_hot = [one_hot_lookup[x] for x in x_data]
# view(-1……)保留原始SeqLen,并添加batch_size,input_size两个维度
# RNNCell 输入需要(seq_len,batch_size, input_size)
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
# 将labels转换为(seq_Len,1)的维度
labels = torch.LongTensor(y_data).view(-1, 1)
# 定义模型
class Model(nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
# 1. 定义隐藏层的输入输出维度
self.rnn_cell = nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
def forward(self, inputs, hidden):
# 2. 传入隐藏层的数据维度要符合前面定义的维度 看官方API
# inputs (batch_size,input_size)
# hidden (batch_size,hidden_size)
hidden = self.rnn_cell(inputs, hidden) # h_t=cell(x_t,h_t-1)
return hidden
# 初始化零向量作为h_0,只有此处用到self.batch_size
def init_hidden(self):
return torch.zeros(self.batch_size, self.hidden_size) # (batch_size,hidden_size)
# 初始化模型
net = Model(input_size=input_size, hidden_size=hidden_size, batch_size=batch_size)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
for epoch in range(15):
# 损失及梯度置0,创建h_0
loss = 0
optimizer.zero_grad()
hidden = net.init_hidden()
print("Predicted string: ", end="")
# inputs=(seq_Len,batch_size,input_size) labels = (seq_Len,1)
# input是按序列取的inputs元素(batch_size,input_size)
# label是按序列取的labels元素(1)
for input, label in zip(inputs, labels):
# hidden 就是 h_t-1
# h_1=cell(x_1,h_0)
# h_t=cell(x_t,h_t-1)
# 技巧:所以下面的 输入的hidden和输出的变量名要写成一样的才好计算
hidden = net(input, hidden) # (batch_size,hidden_size)
loss += criterion(hidden, label) # 全部cell输出的损失之和
# 多分类取最大
# _, idx = hidden.max(dim=1)
idx = hidden.argmax(dim=1)
print(idx2char[idx.item()], end='')
loss.backward()
optimizer.step()
# loss 是一个 tensor,需要用.item()获取数据
print(", Epoch [%d/15] loss = %.4f" % (epoch + 1, loss.item()))
Predicted string: ooooh, Epoch [1/15] loss = 6.2716
Predicted string: ooloo, Epoch [2/15] loss = 5.1896
Predicted string: oolol, Epoch [3/15] loss = 4.4506
Predicted string: ohlol, Epoch [4/15] loss = 3.9354
Predicted string: ohlol, Epoch [5/15] loss = 3.5119
Predicted string: ohlol, Epoch [6/15] loss = 3.1292
Predicted string: ohlol, Epoch [7/15] loss = 2.8064
Predicted string: ohlol, Epoch [8/15] loss = 2.5573
Predicted string: ohlol, Epoch [9/15] loss = 2.3657
Predicted string: ohlol, Epoch [10/15] loss = 2.2133
Predicted string: ohlol, Epoch [11/15] loss = 2.1030
Predicted string: ohlol, Epoch [12/15] loss = 2.0360
Predicted string: ohlol, Epoch [13/15] loss = 1.9980
Predicted string: ohlol, Epoch [14/15] loss = 1.9725
Predicted string: ohlol, Epoch [15/15] loss = 1.9496
import torch
import torch.nn as nn
input_size = 4
hidden_size = 4
num_layers = 1 #使用RNN新增的参数
batch_size = 1
seq_len = 5 #使用RNN新增的参数
"""
x_t -> (input_size,1)
h_t-1 -> (hidden_size,1) 比如 h_0 :(hidden_size,1)
"""
# 数据准备
# 构建输入输出字典
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
# 构造独热向量,此时向量维度为(seq_Len,input_size)
x_one_hot = [one_hot_lookup[x] for x in x_data]
# view(-1……)保留原始SeqLen,并添加batch_size,input_size两个维度
# RNNCell 输入需要(seq_len,batch_size, input_size)
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data) #对输入出数据的维度进行调整
# 定义模型
class Model(nn.Module):
def __init__(self, input_size, hidden_size, batch_size,num_layers = 1):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.num_layers = num_layers
# 1. 定义隐藏层的输入输出维度
self.rnn = torch.nn.RNN(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=num_layers)
def forward(self, input): # 使用RNN不用再显示地对hidden的数据进行操作
# 定义RNN前向计算时先对要用到的hidden数据进行一下全零初始化
hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
# 2. 传入隐藏层的数据维度要符合前面定义的维度 看官方API
out, h_n = self.rnn(input, hidden) # 使用封装好的RNN直接进行计算
# out (seq_len,batch_size,hidden_size)
return out.view(-1, self.hidden_size) # 输出的格式应该是[seq_len*batch_size,hidden_size]大小的矩阵
# 初始化模型
net = Model(input_size=input_size, hidden_size=hidden_size, batch_size=batch_size)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
#定义训练过程
for epoch in range(15):
#因为不需要对seqLen个RNNCell的loss逐一累加计算了
#所以可以直接调用torch中封装好的反向传播逻辑
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs,labels)
# print(loss)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
# print(idx.shape) # torch.Size([5]) 一阶张量 tensor([1,3,3,2,3])
idx = idx.data.numpy() #使用封装好的RNN计算 得到的数据都是以张量的形式出现 [1,3,3,2,3]
print('Prediceted string: ',''.join([idx2char[x] for x in idx]),end = '')
print(',Epoch [%d/15] loss = %.4f' % (epoch + 1, loss.item()))
Prediceted string: ohhhh,Epoch [1/15] loss = 1.3399
Prediceted string: ohllh,Epoch [2/15] loss = 1.1482
Prediceted string: ohlll,Epoch [3/15] loss = 0.9932
Prediceted string: ohlll,Epoch [4/15] loss = 0.8722
Prediceted string: ohlll,Epoch [5/15] loss = 0.7919
Prediceted string: ohlll,Epoch [6/15] loss = 0.7332
Prediceted string: ohlll,Epoch [7/15] loss = 0.6827
Prediceted string: ohlll,Epoch [8/15] loss = 0.6381
Prediceted string: ohlol,Epoch [9/15] loss = 0.5999
Prediceted string: ohlol,Epoch [10/15] loss = 0.5685
Prediceted string: ohlol,Epoch [11/15] loss = 0.5426
Prediceted string: ohlol,Epoch [12/15] loss = 0.5197
Prediceted string: ohlol,Epoch [13/15] loss = 0.4990
Prediceted string: ohlol,Epoch [14/15] loss = 0.4811
Prediceted string: ohlol,Epoch [15/15] loss = 0.4657
import torch
import torch.nn as nn
input_size = 4 # x_t的输入维度
num_class = 4 # 类别数
hidden_size = 8 # h_t的维度
embedding_size = 10 # 词向量维度
batch_size = 1
num_layers = 2 # 两层RNN
seq_len = 5
# 数据准备
# 构建输入输出字典
idx2char = ['e', 'h', 'l', 'o']
# hello
x_data = [[1, 0, 2, 2, 3]] # (batch,seq_len)
# ohlol
y_data = [3, 1, 2, 3, 2] # (batch*seq_len)
# embedding 输入要求是 LongTensor 类型
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)
# 定义模型
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = nn.Embedding(num_embeddings=input_size, # 字典长度
embedding_dim=embedding_size # 词向量维度
)
self.rnn=nn.RNN(input_size=embedding_size # 输入维度是emb的输出维度
,hidden_size=hidden_size
,num_layers=num_layers
,batch_first=True) # 把batch参数放到第一位
self.fc = nn.Linear(in_features=hidden_size # 输入维度是rnn的输出维度
,out_features=num_class) # 四分类
def forward(self, input):
h_0=torch.zeros(num_layers,batch_size,hidden_size)
x=self.emb(input) # (batch,seq_len,embedding_size)
x,h_t=self.rnn(x,h_0) # (batch,seq_len,hidden_size)
x=self.fc(x) # (batch,seq_len,num_class)
return x.view(-1,num_class) # (seq_len*batch,num_class) 这里转变成方便使用CrossEntropyLoss求loss
# 初始化模型
net = Model()
# 定义损失函数和优化器
# 官方API Input: (N,C) C 是类别的数量 Target: (N) N是mini-batch的大小,0 <= targets[i] <= C-1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
# 定义训练过程
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
# print(loss)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
# print(idx.shape) # torch.Size([5]) 一阶张量 tensor([1,3,3,2,3])
idx = idx.data.numpy() # 使用封装好的RNN计算 得到的数据都是以张量的形式出现 [1,3,3,2,3]
print('Prediceted string: ', ''.join([idx2char[x] for x in idx]), end='')
print(',Epoch [%d/15] loss = %.4f' % (epoch + 1, loss.item()))
Prediceted string: ooooo,Epoch [1/15] loss = 1.2535
Prediceted string: ollll,Epoch [2/15] loss = 0.8884
Prediceted string: ohlll,Epoch [3/15] loss = 0.5855
Prediceted string: ohlol,Epoch [4/15] loss = 0.2974
Prediceted string: ohlol,Epoch [5/15] loss = 0.1475
Prediceted string: ohlol,Epoch [6/15] loss = 0.0829
Prediceted string: ohlol,Epoch [7/15] loss = 0.0466
Prediceted string: ohlol,Epoch [8/15] loss = 0.0235
Prediceted string: ohlol,Epoch [9/15] loss = 0.0123
Prediceted string: ohlol,Epoch [10/15] loss = 0.0073
Prediceted string: ohlol,Epoch [11/15] loss = 0.0046
Prediceted string: ohlol,Epoch [12/15] loss = 0.0030
Prediceted string: ohlol,Epoch [13/15] loss = 0.0020
Prediceted string: ohlol,Epoch [14/15] loss = 0.0014
Prediceted string: ohlol,Epoch [15/15] loss = 0.0010
【参考:Pytorch的参数“batch_first”的理解 - 简书】
用过PyTorch的朋友大概都知道,对于不同的网络层,输入的维度虽然不同,但是通常输入的第一个维度都是batch_size,比如torch.nn.Linear的输入(batch_size,in_features),torch.nn.Conv2d的输入(batch_size, C, H, W)。而RNN的输入却是(seq_len, batch_size, input_size),batch_size位于第二维度!虽然你可以将batch_size和序列长度seq_len对换位置,此时只需要令batch_first=True。
但是为什么RNN输入默认不是batch first=True?这是为了便于并行计算。因为cuDNN中RNN的API就是batch_size在第二维度!进一步,为啥cuDNN要这么做呢?因为batch first意味着模型的输入(一个Tensor)在内存中存储时,先存储第一个sequence,再存储第二个… 而如果是seq_len first,模型的输入在内存中,先存储所有序列的第一个单元,然后是第二个单元… 两种区别如下图所示:
如果不用batch_first=True
x_data = [
[1],
[0],
[2],
[2],
[3]
] # (seq_len,batch)
# 确保每次输入都是一个batch
inputs = torch.LongTensor(x_data)
# torch.Size([5,1])
# net(inputs) 会自动循环,每次调用一批次的数据
def forward(self, input):
h_0=torch.zeros(num_layers,batch_size,hidden_size)
x=self.emb(input) # (seq_len,batch,embedding_size)
x,h_t=self.rnn(x,h_0) # (seq_len,batch,hidden_size)
x=self.fc(x) # (seq_len,batch,num_class)
return x.view(-1,num_class) # (seq_len*batch,num_class) 这里转变成方便使用CrossEntropyLoss求loss
有些东西是没有办法用数学严格证明的,都是靠直觉理解,觉得说得通就行,然后用实验验证,比如这个遗忘门。
没搞懂
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import gzip
import csv
import time
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence
import math
# 可不加
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# 超参数
hidden_size = 100
batch_size = 256
n_layer = 2
n_epoch = 100
n_chars = 128
use_gpu = False
class NameDataset(Dataset):
def __init__(self, is_train=True):
# read dataset
filenames = 'name_train.csv.gz' if is_train else 'name_test.csv.gz'
with gzip.open(filenames, 'rt') as f:
reader = csv.reader(f)
rows = list(reader) # 所有数据
# 数据元组(name,country),将其中的name和country提取出来,并记录数量
self.names = [row[0] for row in rows]
self.len = len(self.names)
self.countries = [row[1] for row in rows]
# 将country转换成索引
# 列表->集合->排序->列表->字典
self.country_list = list(sorted((set(self.countries))))
self.country_dict = self.getCountryDict()
self.country_num = len(self.country_list)
def __getitem__(self, index):
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self):
return self.len
# 获取键值对,country(key)-index(value)
def getCountryDict(self):
country_dict = dict()
for idx, country_name in enumerate(self.country_list, start=0): # 索引从0开始
country_dict[country_name] = idx
return country_dict
# 根据索引返回国家名
def idx2country(self, index):
return self.country_list[index]
# 返回国家数目
def getCountriesNum(self):
return self.country_num
train_set = NameDataset(is_train=True)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False)
test_set = NameDataset(is_train=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
class RNNClassifier(nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.n_directions = 2 if bidirectional else 1 # 是否是双向
# Embedding层输入 (SeqLen,BatchSize)
# Embedding层输出 (SeqLen,BatchSize,HiddenSize)
# 将原先样本总数为SeqLen,批量数为BatchSize的数据,转换为HiddenSize维的向量
self.embedding = nn.Embedding(num_embeddings=input_size
, embedding_dim=hidden_size)
# 输入输出参考官方API
self.gru = nn.GRU(input_size=hidden_size # 输入维度是embedding的输出维度
, hidden_size=hidden_size
, num_layers=n_layers
, bidirectional=bidirectional)
self.fc = nn.Linear(in_features=hidden_size * self.n_directions
, out_features=output_size)
def forward(self, input):
input = input.t()
batch_size = input.size(1)
hidden = self._init_hidden()
embedding = self.embedding(input)
# output (seq_len, batch, hidden_size * num_directions)
# h_n (num_layers * num_directions, batch, hidden_size)
output, hidden = self.gru(gru_input, hidden)
# 如果是双向神经网络会有h_N^f以及h_N^b两个hidden
if self.n_directions == 2:
# 这里没理解
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1) # 按行拼接
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat)
return fc_output
def _init_hidden(self):
# GRU h_0(num_layers * num_directions, batch, hidden_size)
hidden = torch.zeros(self.n_layers * self.n_directions
, batch_size
, self.hidden_size)
return hidden
# ord()取ASCII码值
def name2list(name):
arr = [ord(c) for c in name]
return arr, len(arr)
def make_tensors(names, countries):
sequences_and_length = [name2list(name) for name in names]
#取出所有的列表中每个姓名的ASCII码序列
name_sequences = [s1[0] for s1 in sequences_and_length]
#将列表车行度转换为LongTensor
seq_length = torch.LongTensor([s1[1] for s1 in sequences_and_length])
#将整型变为长整型
countries = countries.long()
#做padding
#新建一个全0张量大小为最大长度-当前长度
seq_tensor = torch.zeros(len(name_sequences), seq_length.max()).long()
#取出每个序列及其长度idx固定0
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_length), 0):
#将序列转化为LongTensor填充至第idx维的0到当前长度的位置
seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
#返回排序后的序列及索引
seq_length, perm_idx = seq_length.sort(dim = 0, descending = True)
seq_tensor = seq_tensor[perm_idx]
countries = countries[perm_idx]
return create_tensor(seq_tensor),
create_tensor(seq_length),
create_tensor(countries)
多看文档
多读最新的文献
多敲代码(不是简单的复制粘贴后运行)