https://pytorch.org/docs/stable/generated/torch.nn.RNN.html
import torch
import torch.nn as nn
batch,T = 2,3 # 批大小,序列长度
input_size ,hidden_size = 2,3
input = torch.randn(batch,T,input_size)# 随机初始化输入
h_prev = torch.zeros(batch,hidden_size)# 初始化隐层
#step 1 调用Pytorch RNN
rnn = nn.RNN(input_size,hidden_size,batch_first=True)
rnn_out,rnn_hidden = rnn(input,h_prev.unsqueeze(0))#要求隐层为 D*seq_len ,batch,hidden_size
print(rnn_out)
print(rnn_hidden)
# step 2 手写rnn_forward 函数实现RNN计算原理
def rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev):
# print(weight_ih.shape)#hidden_size *inputsize
# print(weight_hh.shape)#hidden_size* hidden_size
# print(h_prev.shape)# batch*hidden_size
bs,T,input_size = input.shape
h_dim = weight_ih.shape[0]
h_out= torch.zeros(bs,T,h_dim)#初始化一个输出
for t in range(T):
x = input[:,t,:]#获取t时刻输入 batch *input_size
w_ih_batch=weight_ih.unsqueeze(0).tile(bs,1,1)#复制到batch ,为后续乘法做准备 b* h_dim* input_size
w_hh_batch=weight_hh.unsqueeze(0).tile(bs,1,1)# b* hidden * hidden
x=x.unsqueeze(2) #batch * input_size * 1
w_times_x=torch.bmm(w_ih_batch,x).squeeze(-1)# batch* hidden
w_times_h=torch.bmm(w_hh_batch,h_prev.unsqueeze(2)).squeeze(-1) #batch * hidden
h_prev=torch.tanh(w_times_x+w_times_h+bias_ih+bias_hh)
h_out[:,t,:]=h_prev
return h_out,h_prev.unsqueeze(0)
#验证 rnn_forward 准确性
# for k,v in rnn.named_parameters():
# print(k,v)
custom_out,custom_hidde=rnn_forward(input,rnn.weight_ih_l0,rnn.weight_hh_l0,\
rnn.bias_ih_l0 ,rnn.bias_hh_l0,h_prev)
print("*"*5,"custom","*"*5)
print(custom_out)
print(custom_hidde)
# -----------------------------输出-------------------------------------
tensor([[[ 0.5276, -0.1447, 0.5931],
[ 0.8260, -0.0513, 0.4385],
[ 0.5481, -0.2855, 0.0533]],
[[ 0.4044, -0.1576, 0.3338],
[ 0.9303, 0.5252, 0.0067],
[ 0.3523, -0.0402, 0.3841]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.5481, -0.2855, 0.0533],
[ 0.3523, -0.0402, 0.3841]]], grad_fn=<StackBackward0>)
***** custom *****
tensor([[[ 0.5276, -0.1447, 0.5931],
[ 0.8260, -0.0513, 0.4385],
[ 0.5481, -0.2855, 0.0533]],
[[ 0.4044, -0.1576, 0.3338],
[ 0.9303, 0.5252, 0.0067],
[ 0.3523, -0.0402, 0.3841]]], grad_fn=<CopySlices>)
tensor([[[ 0.5481, -0.2855, 0.0533],
[ 0.3523, -0.0402, 0.3841]]], grad_fn=<UnsqueezeBackward0>)
https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
import torch
import torch.nn as nn
bs,T,i_size,h_size=2,3,4,5
#proj_size
input = torch.randn(bs,T,i_size)#输入
c0= torch.randn(bs,h_size) #初始值,不需要训练
h0= torch.randn(bs,h_size) # bs*h_size
#调用官方API
lstm_layer = nn.LSTM(i_size,h_size,batch_first =True)
output,(h_final,c_final) =lstm_layer(input,(h0.unsqueeze(0),c0.unsqueeze(0)))
# print(output)
# print(h_final,c_final)
print("-"*30)
for p,v in lstm_layer.named_parameters():
print(p,v.shape)
------------输出--------------
weight_ih_l0 torch.Size([20, 4])
weight_hh_l0 torch.Size([20, 5])
bias_ih_l0 torch.Size([20])
bias_hh_l0 torch.Size([20])
# bs,T,i_size,h_size=2,3,4,5
# input = torch.randn(bs,T,i_size)#输入
# c0= torch.randn(bs,h_size) #初始值,不需要训练
# h0= torch.randn(bs,h_size) # bs*h_size
#自己写一个LSTM模型
def lstm_forward(input, initial_states, w_ih, w_hh, b_ih, b_hh):
#w_ih [4*hiddensize,inputsize]
#w_hh [4*hiddensize,hiddensize]
#b_ih = b_hh = [4*hiddensize]
h0,c0 = initial_states
bs, T, i_size =input.shape
h_size = w_ih.shape[0]//4
prev_h = h0
prev_c = c0
batch_w_ih = w_ih.unsqueeze(0).tile(bs,1,1)#[4*hiddensize,inputsize] ====>[barch,4*hiddensize,inputsize]
batch_w_hh = w_hh.unsqueeze(0).tile(bs,1,1)#[4*hiddensize,hiddensize] ====>[barch,4*hiddensize,hiddensize]
output_size = h_size
output = torch.zeros(bs, T, output_size)
for t in range(T):
x = input[:,t,:] #batch * inputsize
w_times_x = torch.bmm(batch_w_ih,x.unsqueeze(-1))# batch ,4*hidden, 1
w_times_x = w_times_x.squeeze(-1) # batch,4*hidden
w_times_h_prev = torch.bmm(batch_w_hh,prev_h.unsqueeze(-1))#batch ,4*hidden ,1
w_times_h_prev = w_times_h_prev.squeeze(-1) #batch , 4*hidden
i_t = torch.sigmoid(w_times_x[:,:h_size] + w_times_h_prev[:,:h_size] + b_ih[:h_size] + b_hh[:h_size])
f_t = torch.sigmoid(w_times_x[:,h_size:2*h_size] + w_times_h_prev[:,h_size:2*h_size] \
+ b_ih[h_size:2*h_size] + b_hh[h_size:2*h_size])
g_t = torch.tanh(w_times_x[:,2*h_size:3*h_size] + w_times_h_prev[:,2*h_size:3*h_size] \
+ b_ih[2*h_size:3*h_size] + b_hh[2*h_size:3*h_size])
o_t = torch.sigmoid(w_times_x[:,3*h_size:4*h_size] + w_times_h_prev[:,3*h_size:4*h_size] \
+ b_ih[3*h_size:4*h_size] + b_hh[3*h_size:4*h_size])
prev_c = f_t * prev_c + i_t * g_t
prev_h = o_t * torch.tanh(prev_c)
output[:,t,:]=prev_h
return output,(prev_h,prev_c)
output_custom,(h_custom,c_custom) = lstm_forward(input, (h0,c0), lstm_layer.weight_ih_l0 ,lstm_layer.weight_hh_l0, \
lstm_layer.bias_ih_l0,lstm_layer.bias_hh_l0)
print(output)
print(output_custom)
------------------------------输出-----------------------
tensor([[[-0.1115, -0.0885, 0.2838, 0.3573, -0.1590],
[-0.1856, 0.0371, 0.3207, 0.0711, -0.1239],
[-0.0640, -0.0801, 0.2159, 0.0815, -0.2722]],
[[-0.1969, -0.2094, 0.3068, 0.5677, -0.4094],
[-0.2194, -0.2867, 0.3283, 0.1256, -0.2353],
[-0.2950, -0.0986, 0.1122, 0.2006, -0.1054]]],
grad_fn=<TransposeBackward0>)
tensor([[[-0.1115, -0.0885, 0.2838, 0.3573, -0.1590],
[-0.1856, 0.0371, 0.3207, 0.0711, -0.1239],
[-0.0640, -0.0801, 0.2159, 0.0815, -0.2722]],
[[-0.1969, -0.2094, 0.3068, 0.5677, -0.4094],
[-0.2194, -0.2867, 0.3283, 0.1256, -0.2353],
[-0.2950, -0.0986, 0.1122, 0.2006, -0.1054]]], grad_fn=<CopySlices>)
https://pytorch.org/docs/stable/generated/torch.nn.GRU.html
import torch
import torch.nn as nn
input_size =3
hidden_size =5
batch_size = 2
T = 3
input = torch.randn(batch_size,T,input_size)
#对比GRU和LSTM,发现参数数量前者是后者的0.75倍
lstm_model = nn.LSTM(3,5)
gru_model = nn.GRU(3,5)
# for p in lstm_model.parameters():
# print(p.numel())# input*hidden*4 + hidden* hidden *4 + hidden *4 + hidden*4
print(sum(p.numel() for p in lstm_model.parameters()))
print(sum(p.numel() for p in gru_model.parameters()))
200
150
input_size =3
hidden_size =5
batch_size = 2
T = 3
input = torch.randn(batch_size,T,input_size)
gru_finnal = nn.GRU(input_size,hidden_size,batch_first=True)
print(gru_finnal(input))
for k,v in gru_finnal.named_parameters():
print(k,v.shape)
(tensor([[[-0.1742, 0.2157, -0.3234, 0.1311, 0.2969],
[-0.1060, 0.2348, -0.1830, 0.1765, 0.1432],
[ 0.0437, 0.4922, -0.6420, 0.5938, 0.3552]],
[[ 0.2877, 0.2825, -0.4608, 0.4569, 0.2492],
[ 0.0747, 0.4547, -0.7333, 0.4709, 0.4990],
[ 0.0906, 0.4704, -0.5774, 0.3330, -0.1233]]],
grad_fn=<TransposeBackward1>), tensor([[[ 0.0437, 0.4922, -0.6420, 0.5938, 0.3552],
[ 0.0906, 0.4704, -0.5774, 0.3330, -0.1233]]],
grad_fn=<StackBackward0>))
weight_ih_l0 torch.Size([15, 3])
weight_hh_l0 torch.Size([15, 5])
bias_ih_l0 torch.Size([15])
bias_hh_l0 torch.Size([15])
def gru_forward(input,initial_states,w_ih, w_hh, b_ih, b_hh):
# w_ih 15*3
#w_hh 15*5
#b_ih 15
#b_hh 15
batch, T, input_size = input.shape
hidden_size = w_hh.shape[1]
prev_h0 = initial_states
output = torch.zeros(batch_size,T,hidden_size)
#权重扩维度
batch_w_ih = w_ih.unsqueeze(0).tile(batch,1,1)
batch_w_hh = w_hh.unsqueeze(0).tile(batch,1,1)
for t in range(T) :
t_x = input[:,t,:] #batch * inputsize
t_wih_x = torch.bmm(batch_w_ih,t_x.unsqueeze(-1)) # batch * 15 * 1
t_wih_x = t_wih_x.squeeze(-1)# batch * 15
t_whh_x = torch.bmm(batch_w_hh,prev_h0.unsqueeze(-1)) # batch * 15 * 1
t_whh_x = t_whh_x.squeeze(-1)# batch * 15
#重置门
r_t = torch.sigmoid(t_wih_x[:,:hidden_size] + t_whh_x[:,:hidden_size]+\
b_ih[:hidden_size] + b_hh[:hidden_size])
# print(r_t.shape) # batch * hidden
#更新门
z_t = torch.sigmoid(t_wih_x[:,hidden_size:2*hidden_size] + t_whh_x[:,hidden_size:2*hidden_size]\
+ b_ih[hidden_size:2*hidden_size] + b_hh[hidden_size:2*hidden_size])
#候选状态
n_t = torch.tanh(t_wih_x[:,2*hidden_size:]+ b_ih[2*hidden_size:] + \
r_t*(t_whh_x[:,2*hidden_size:]+b_hh[2*hidden_size:]))
#更新
prev_h0 = (1-z_t)*n_t + z_t*prev_h0
output[:,t,:]=prev_h0
return output,prev_h0
prevH = torch.zeros(batch_size,hidden_size)
custom_output,custom_layer=gru_forward(input,prevH, gru_finnal.weight_ih_l0, gru_finnal.\
weight_hh_l0, gru_finnal.bias_ih_l0 , gru_finnal.bias_hh_l0)
print(gru_custom)
(tensor([[[-0.1742, 0.2157, -0.3234, 0.1311, 0.2969],
[-0.1060, 0.2348, -0.1830, 0.1765, 0.1432],
[ 0.0437, 0.4922, -0.6420, 0.5938, 0.3552]],
[[ 0.2877, 0.2825, -0.4608, 0.4569, 0.2492],
[ 0.0747, 0.4547, -0.7333, 0.4709, 0.4990],
[ 0.0906, 0.4704, -0.5774, 0.3330, -0.1233]]], grad_fn=<CopySlices>), tensor([[ 0.0437, 0.4922, -0.6420, 0.5938, 0.3552],
[ 0.0906, 0.4704, -0.5774, 0.3330, -0.1233]], grad_fn=<AddBackward0>))
#验证模型是否正确
output,layer=gru_finnal(input)
print(torch.allclose(custom_output, output))
print(torch.allclose(custom_layer, layer))
True
True
参考:
https://pytorch.org/docs/stable/generated/
https://www.bilibili.com/video/BV13i4y1R7jB/spm_id_from=333.788&vd_source=697b84ee1a675d11783da45d68573de1