Pytorch手写RNN、LSTM、GRU

手写RNN、LSTM、GRU

  • RNN
  • LSTM
  • GRU

  • 最近在看RNN、LSTM、GRU源码,动手实现forward函数,并将torch.nn中的原始模型自带的参数传入自己写的forward中来验证模型是否正确。

RNN

https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

import torch
import torch.nn as nn

batch,T = 2,3 # 批大小,序列长度
input_size ,hidden_size = 2,3
input = torch.randn(batch,T,input_size)# 随机初始化输入
h_prev = torch.zeros(batch,hidden_size)# 初始化隐层

#step 1 调用Pytorch RNN
rnn = nn.RNN(input_size,hidden_size,batch_first=True)
rnn_out,rnn_hidden = rnn(input,h_prev.unsqueeze(0))#要求隐层为 D*seq_len ,batch,hidden_size
print(rnn_out)
print(rnn_hidden)

# step 2  手写rnn_forward 函数实现RNN计算原理
def rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev): 
#     print(weight_ih.shape)#hidden_size *inputsize 
#     print(weight_hh.shape)#hidden_size* hidden_size
#     print(h_prev.shape)# batch*hidden_size
    bs,T,input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out= torch.zeros(bs,T,h_dim)#初始化一个输出
    
    for t in range(T):
        x = input[:,t,:]#获取t时刻输入 batch *input_size
        w_ih_batch=weight_ih.unsqueeze(0).tile(bs,1,1)#复制到batch ,为后续乘法做准备 b* h_dim* input_size 
        w_hh_batch=weight_hh.unsqueeze(0).tile(bs,1,1)# b* hidden * hidden
        x=x.unsqueeze(2) #batch * input_size * 1
        w_times_x=torch.bmm(w_ih_batch,x).squeeze(-1)# batch* hidden
        w_times_h=torch.bmm(w_hh_batch,h_prev.unsqueeze(2)).squeeze(-1) #batch * hidden
        h_prev=torch.tanh(w_times_x+w_times_h+bias_ih+bias_hh)
        h_out[:,t,:]=h_prev
    return h_out,h_prev.unsqueeze(0)

#验证 rnn_forward 准确性
# for k,v in rnn.named_parameters():
#     print(k,v)

custom_out,custom_hidde=rnn_forward(input,rnn.weight_ih_l0,rnn.weight_hh_l0,\
                                    rnn.bias_ih_l0 ,rnn.bias_hh_l0,h_prev)
print("*"*5,"custom","*"*5)
print(custom_out)
print(custom_hidde)


# -----------------------------输出-------------------------------------

tensor([[[ 0.5276, -0.1447,  0.5931],
         [ 0.8260, -0.0513,  0.4385],
         [ 0.5481, -0.2855,  0.0533]],

        [[ 0.4044, -0.1576,  0.3338],
         [ 0.9303,  0.5252,  0.0067],
         [ 0.3523, -0.0402,  0.3841]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.5481, -0.2855,  0.0533],
         [ 0.3523, -0.0402,  0.3841]]], grad_fn=<StackBackward0>)
***** custom *****
tensor([[[ 0.5276, -0.1447,  0.5931],
         [ 0.8260, -0.0513,  0.4385],
         [ 0.5481, -0.2855,  0.0533]],

        [[ 0.4044, -0.1576,  0.3338],
         [ 0.9303,  0.5252,  0.0067],
         [ 0.3523, -0.0402,  0.3841]]], grad_fn=<CopySlices>)
tensor([[[ 0.5481, -0.2855,  0.0533],
         [ 0.3523, -0.0402,  0.3841]]], grad_fn=<UnsqueezeBackward0>)

LSTM

https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html

import torch
import torch.nn as nn

bs,T,i_size,h_size=2,3,4,5
#proj_size
input = torch.randn(bs,T,i_size)#输入
c0= torch.randn(bs,h_size) #初始值,不需要训练
h0= torch.randn(bs,h_size) # bs*h_size

#调用官方API
lstm_layer = nn.LSTM(i_size,h_size,batch_first =True)
output,(h_final,c_final) =lstm_layer(input,(h0.unsqueeze(0),c0.unsqueeze(0)))

# print(output)
# print(h_final,c_final)

print("-"*30)

for p,v in lstm_layer.named_parameters():
    print(p,v.shape)
------------输出--------------
weight_ih_l0 torch.Size([20, 4])
weight_hh_l0 torch.Size([20, 5])
bias_ih_l0 torch.Size([20])
bias_hh_l0 torch.Size([20])
# bs,T,i_size,h_size=2,3,4,5
# input = torch.randn(bs,T,i_size)#输入
# c0= torch.randn(bs,h_size) #初始值,不需要训练
# h0= torch.randn(bs,h_size) # bs*h_size
#自己写一个LSTM模型
def lstm_forward(input, initial_states, w_ih, w_hh, b_ih, b_hh):
    #w_ih [4*hiddensize,inputsize]
    #w_hh [4*hiddensize,hiddensize]
    #b_ih = b_hh = [4*hiddensize]
    h0,c0 = initial_states
    bs, T, i_size =input.shape
    h_size = w_ih.shape[0]//4
    
    prev_h = h0
    prev_c = c0
    batch_w_ih = w_ih.unsqueeze(0).tile(bs,1,1)#[4*hiddensize,inputsize] ====>[barch,4*hiddensize,inputsize]
    batch_w_hh = w_hh.unsqueeze(0).tile(bs,1,1)#[4*hiddensize,hiddensize] ====>[barch,4*hiddensize,hiddensize]
    output_size = h_size
    output = torch.zeros(bs, T, output_size)
        
    for t in range(T):
        x = input[:,t,:] #batch * inputsize
        w_times_x = torch.bmm(batch_w_ih,x.unsqueeze(-1))# batch ,4*hidden, 1
        w_times_x = w_times_x.squeeze(-1) # batch,4*hidden
        
        w_times_h_prev = torch.bmm(batch_w_hh,prev_h.unsqueeze(-1))#batch ,4*hidden ,1
        w_times_h_prev = w_times_h_prev.squeeze(-1) #batch , 4*hidden 
        i_t = torch.sigmoid(w_times_x[:,:h_size] + w_times_h_prev[:,:h_size] + b_ih[:h_size] + b_hh[:h_size])
        f_t = torch.sigmoid(w_times_x[:,h_size:2*h_size] + w_times_h_prev[:,h_size:2*h_size] \
                            + b_ih[h_size:2*h_size] + b_hh[h_size:2*h_size])
        g_t = torch.tanh(w_times_x[:,2*h_size:3*h_size] + w_times_h_prev[:,2*h_size:3*h_size] \
                            + b_ih[2*h_size:3*h_size] + b_hh[2*h_size:3*h_size])
        o_t = torch.sigmoid(w_times_x[:,3*h_size:4*h_size] + w_times_h_prev[:,3*h_size:4*h_size] \
                            + b_ih[3*h_size:4*h_size] + b_hh[3*h_size:4*h_size])
        prev_c = f_t * prev_c + i_t * g_t
        prev_h = o_t * torch.tanh(prev_c)
        
        output[:,t,:]=prev_h
    return output,(prev_h,prev_c)

output_custom,(h_custom,c_custom) = lstm_forward(input, (h0,c0), lstm_layer.weight_ih_l0 ,lstm_layer.weight_hh_l0, \
                                                lstm_layer.bias_ih_l0,lstm_layer.bias_hh_l0)

print(output)
print(output_custom)
------------------------------输出-----------------------
tensor([[[-0.1115, -0.0885,  0.2838,  0.3573, -0.1590],
         [-0.1856,  0.0371,  0.3207,  0.0711, -0.1239],
         [-0.0640, -0.0801,  0.2159,  0.0815, -0.2722]],

        [[-0.1969, -0.2094,  0.3068,  0.5677, -0.4094],
         [-0.2194, -0.2867,  0.3283,  0.1256, -0.2353],
         [-0.2950, -0.0986,  0.1122,  0.2006, -0.1054]]],
       grad_fn=<TransposeBackward0>)
tensor([[[-0.1115, -0.0885,  0.2838,  0.3573, -0.1590],
         [-0.1856,  0.0371,  0.3207,  0.0711, -0.1239],
         [-0.0640, -0.0801,  0.2159,  0.0815, -0.2722]],

        [[-0.1969, -0.2094,  0.3068,  0.5677, -0.4094],
         [-0.2194, -0.2867,  0.3283,  0.1256, -0.2353],
         [-0.2950, -0.0986,  0.1122,  0.2006, -0.1054]]], grad_fn=<CopySlices>)

GRU

https://pytorch.org/docs/stable/generated/torch.nn.GRU.html

import torch 
import torch.nn as nn
input_size =3
hidden_size =5
batch_size = 2
T = 3
input = torch.randn(batch_size,T,input_size)
#对比GRU和LSTM,发现参数数量前者是后者的0.75倍
lstm_model = nn.LSTM(3,5)
gru_model = nn.GRU(3,5)

# for p in lstm_model.parameters():
#     print(p.numel())# input*hidden*4   +  hidden* hidden *4 + hidden *4 + hidden*4

print(sum(p.numel() for p in lstm_model.parameters()))
print(sum(p.numel() for p in gru_model.parameters()))
200
150
input_size =3
hidden_size =5
batch_size = 2
T = 3
input = torch.randn(batch_size,T,input_size)

gru_finnal = nn.GRU(input_size,hidden_size,batch_first=True)

print(gru_finnal(input))
for k,v in gru_finnal.named_parameters():
    print(k,v.shape)
(tensor([[[-0.1742,  0.2157, -0.3234,  0.1311,  0.2969],
         [-0.1060,  0.2348, -0.1830,  0.1765,  0.1432],
         [ 0.0437,  0.4922, -0.6420,  0.5938,  0.3552]],

        [[ 0.2877,  0.2825, -0.4608,  0.4569,  0.2492],
         [ 0.0747,  0.4547, -0.7333,  0.4709,  0.4990],
         [ 0.0906,  0.4704, -0.5774,  0.3330, -0.1233]]],
       grad_fn=<TransposeBackward1>), tensor([[[ 0.0437,  0.4922, -0.6420,  0.5938,  0.3552],
         [ 0.0906,  0.4704, -0.5774,  0.3330, -0.1233]]],
       grad_fn=<StackBackward0>))
weight_ih_l0 torch.Size([15, 3])
weight_hh_l0 torch.Size([15, 5])
bias_ih_l0 torch.Size([15])
bias_hh_l0 torch.Size([15])
def gru_forward(input,initial_states,w_ih, w_hh, b_ih, b_hh):
    # w_ih 15*3
    #w_hh 15*5
    #b_ih 15
    #b_hh 15
    batch, T, input_size = input.shape
    hidden_size = w_hh.shape[1]
    prev_h0 = initial_states
    output = torch.zeros(batch_size,T,hidden_size)
    #权重扩维度
    batch_w_ih = w_ih.unsqueeze(0).tile(batch,1,1)
    batch_w_hh = w_hh.unsqueeze(0).tile(batch,1,1)
    
    for t in range(T) :
        t_x = input[:,t,:] #batch * inputsize
        t_wih_x = torch.bmm(batch_w_ih,t_x.unsqueeze(-1)) # batch * 15 * 1
        t_wih_x = t_wih_x.squeeze(-1)# batch * 15 
        t_whh_x = torch.bmm(batch_w_hh,prev_h0.unsqueeze(-1)) # batch * 15 * 1
        t_whh_x = t_whh_x.squeeze(-1)# batch * 15 
        
        #重置门
        r_t = torch.sigmoid(t_wih_x[:,:hidden_size] + t_whh_x[:,:hidden_size]+\
                            b_ih[:hidden_size] + b_hh[:hidden_size])
#         print(r_t.shape)  # batch * hidden
        #更新门
        z_t = torch.sigmoid(t_wih_x[:,hidden_size:2*hidden_size] + t_whh_x[:,hidden_size:2*hidden_size]\
                            + b_ih[hidden_size:2*hidden_size] + b_hh[hidden_size:2*hidden_size])
        #候选状态
        n_t = torch.tanh(t_wih_x[:,2*hidden_size:]+ b_ih[2*hidden_size:] + \
                         r_t*(t_whh_x[:,2*hidden_size:]+b_hh[2*hidden_size:]))
        #更新
        prev_h0 = (1-z_t)*n_t + z_t*prev_h0
        
        output[:,t,:]=prev_h0
    return output,prev_h0
prevH = torch.zeros(batch_size,hidden_size)
custom_output,custom_layer=gru_forward(input,prevH, gru_finnal.weight_ih_l0, gru_finnal.\
                                       weight_hh_l0, gru_finnal.bias_ih_l0 , gru_finnal.bias_hh_l0)
print(gru_custom)
(tensor([[[-0.1742,  0.2157, -0.3234,  0.1311,  0.2969],
         [-0.1060,  0.2348, -0.1830,  0.1765,  0.1432],
         [ 0.0437,  0.4922, -0.6420,  0.5938,  0.3552]],

        [[ 0.2877,  0.2825, -0.4608,  0.4569,  0.2492],
         [ 0.0747,  0.4547, -0.7333,  0.4709,  0.4990],
         [ 0.0906,  0.4704, -0.5774,  0.3330, -0.1233]]], grad_fn=<CopySlices>), tensor([[ 0.0437,  0.4922, -0.6420,  0.5938,  0.3552],
        [ 0.0906,  0.4704, -0.5774,  0.3330, -0.1233]], grad_fn=<AddBackward0>))
#验证模型是否正确
output,layer=gru_finnal(input)
print(torch.allclose(custom_output, output))
print(torch.allclose(custom_layer, layer))
True
True

参考:

https://pytorch.org/docs/stable/generated/
https://www.bilibili.com/video/BV13i4y1R7jB/spm_id_from=333.788&vd_source=697b84ee1a675d11783da45d68573de1

你可能感兴趣的:(深度学习,pytorch,rnn,lstm,gru)