import torch
"需要:初始化h0,输入序列"
batch_size = 1
input_size = 4
hidden_size = 2
seq_len = 3
cell = torch.nn.RNNCell(input_size=input_size, hidden_size = hidden_size)
dataset = torch.randn(seq_len,batch_size,input_size) #构造输入序列
hidden = torch.zeros(batch_size, hidden_size) #构造全是0的隐层,即初始化h0
for idex,input in enumerate(dataset):
print('='*20, idex, '='*20)
print('Input size:',input.shape)
hidden = cell(input,hidden)
print('outputs size:',hidden.shape)
print('hidden:', hidden)
import torch
batch_size = 1
input_size = 4
hidden_size = 2
seq_len = 3
num_layers = 2
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
out,hidden = cell(inputs,hidden)
print('output size:', out.shape)
print('out:',out)
print('hidden size:',hidden.shape)
print('hidden:', hidden)
import torch
import torch.nn as nn
import torch.optim as optim
input_size = 4
hidden_size = 4
batch_size = 1
# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]] # 分别对应0,1,2,3即e,h,l,o
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) #-1即seqLen
labels = torch.LongTensor(y_data).view(-1, 1) #(seqLen,1)
# define model
class Model(nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.rnncell = nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden)
return hidden
def init_hidden(self):
return torch.zeros(self.batch_size, self.hidden_size)
model = Model(input_size, hidden_size, batch_size)
# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)
# training cycle
for epoch in range(15):
loss = 0
optimizer.zero_grad()
hidden = model.init_hidden() # h0
print('predicted string:', end='')
for input, label in zip(inputs, labels):
hidden = model(input, hidden)
loss += criterion(hidden, label)
_, idx = hidden.max(dim=1) # hidden是4维的,分别表示e,h,l,o的概率值
print(idx2char[idx.item()], end='')
loss.backward()
optimizer.step()
print(',epoch [%d/15] loss = %.4lf' % (epoch +1, loss.item()))
import torch
import torch.nn as nn
import torch.optim as optim
input_size = 4
hidden_size = 4
batch_size = 1
num_layers = 1
seq_len = 5
# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]] # 分别对应0,1,2,3即e,h,l,o
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
labels = torch.LongTensor(y_data)
# define model
class Model(nn.Module):
def __init__(self, input_size, hidden_size, batch_size, num_layers):
super(Model, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.num_layers = num_layers
self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)
def forward(self, input):
hidden = torch.zeros(self.num_layers, self.batch_size, self.input_size)
out, _ = self.RNN(input, hidden)
return out.view(-1,self.hidden_size) #rashpe out to (seq_len×batch_size, hiddensize)
model = Model(input_size, hidden_size, batch_size, num_layers)
# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05)
# training cycle
for epoch in range(15):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print('outputs:',outputs)
_, idx = outputs.max(dim=1)
idx = idx.data.numpy() # reshape to numpy
print('idx', idx)
print('Pridected:', ''.join([idx2char[x] for x in idx]), end='') # end是不自动换行,''.join是连接字符串数组
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
One-hot矩阵是high-dimension、sparse,hardcoded的,通过Embedding将one-hot稀疏矩阵映射成低维、稠密的矩阵。
网络模型:Embedding Layer + RNN Cell + Linear Layer
import torch
import torch.nn as nn
import torch.optim as optim
num_class = 4 #4个类别,
input_size = 4 #输入维度
hidden_size = 8 #隐层输出维度,有8个隐层
embedding_size = 10 #嵌入到10维空间
num_layers = 2 #2层的RNN
batch_size = 1
seq_len = 5 #序列长度5
# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] #(batch, seq_len) list
y_data = [3, 1, 2, 3, 2] # ohlol
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)
# define model
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = nn.Embedding(input_size, embedding_size)
self.rnn = nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc = nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
x = self.emb(x)
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, num_class)
model = Model()
# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05)
# training cycle
for epoch in range(15):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print('outputs:',outputs)
_, idx = outputs.max(dim=1)
idx = idx.data.numpy() # reshape to numpy
print('idx', idx)
print('Pridected:', ''.join([idx2char[x] for x in idx]), end='') # end是不自动换行,''.join是连接字符串数组
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))