PyTorch建立RNN相关模型

之前写过PyTorch建立深度神经网络，这一次是上一篇文章的延续，将介绍RNN,LSTM,GRU相关模型搭建。
RNN、GRU、LSTM均是拥有“记忆”功能的网络模块，在模型参数上基本相同。

建立RNN模型

# 定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.rnn = nn.RNN(input_size=28, # 输入特征的大小
                           hidden_size=64, # RNN模块（block）的数量，随意设置
                           num_layers=1,    # 表示RNN层的层数
                           batch_first=True) # RNN默认输入的格式是[seq_len, batch, feature]
                                             # batch_first=True表示格式变成[batch, seq_len, feature]
        
        # 这里的64和hidden_size的64相对应
        self.out = nn.Linear(64, 10)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        inputs = x.reshape((x.shape[0],28, -1))
        # output:[batch, seq_len, hidden_size]
        # 虽然RNN的batch_first为True，但是h_n,c_n第一个维度还是num_layers
        # h_n:[num_layers*num_directions, batch, hidden_size] 只包含最后一个序列的隐层结果，表示h_t
        # num_directions:取值为1或2，表示是否为双向RNN
        output, h_n = self.rnn(inputs)
        output_in_last_timestep = h_n[-1, :, :]
        x = self.out(output_in_last_timestep)
        out = self.softmax(x)
        return out

建立LSTM模型

# 定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=28, # 输入特征的大小
                           hidden_size=64, # LSTM模块（block）的数量，随意设置
                           num_layers=1,    # 表示LSTM层的层数
                           batch_first=True) # LSTM默认输入的格式是[seq_len, batch, feature]
                                             # batch_first=True表示格式变成[batch, seq_len, feature]
        
        # 这里的64和hidden_size的64相对应
        self.out = nn.Linear(64, 10)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        inputs = x.reshape((x.shape[0],28, -1))
        # output:[batch, seq_len, hidden_size]
        # 虽然LSTM的batch_first为True，但是h_n,c_n第一个维度还是num_layers
        # h_n:[num_layers*num_directions, batch, hidden_size] 只包含最后一个序列的输出结果，表示h_t
        # c_n:[num_layers*num_directions, batch, hidden_size] 只包含最后一个序列的cell结果，表示c_t
        # num_directions:取值为1或2，表示是否为双向LSTM
        output, (h_n, c_n) = self.lstm(inputs)
        output_in_last_timestep = h_n[-1, :, :]
        x = self.out(output_in_last_timestep)
        out = self.softmax(x)
        return out

定义双向LSTM模型需要在nn.LSTM中添加参数bidirectional=True

定义GRU模型

# 定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.gru = nn.GRU(input_size=28, # 输入特征的大小
                           hidden_size=64, # GRU模块（block）的数量，随意设置
                           num_layers=1,    # 表示GRU层的层数
                           batch_first=True) # GRU默认输入的格式是[seq_len, batch, feature]
                                             # batch_first=True表示格式变成[batch, seq_len, feature]
        
        # 这里的64和hidden_size的64相对应
        self.out = nn.Linear(64, 10)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        inputs = x.reshape((x.shape[0],28, -1))
        # output:[batch, seq_len, hidden_size*num_directions]
        # 虽然GRU的batch_first为True，但是h_n,c_n第一个维度还是num_layers
        # h_n:[num_layers*num_directions, batch, hidden_size] 只包含最后一个序列的输出结果，表示h_t
        # num_directions:取值为1或2，表示是否为双向GRU
        output, h_n = self.gru(inputs)
        output_in_last_timestep = h_n[-1, :, :]
        x = self.out(output_in_last_timestep)
        out = self.softmax(x)
        return out

模型加载与保存

PyTorch允许用户保存和加载已训练完成的模型

# 保存模型
torch.save(model.state_dict(), 'model/GRUModel.pth')
# 加载模型
model.load_state_dict(torch.load('model/GRUModel.pth'))

模型显示

Keras创建的模型可以使用summary方法显示模型相关参数，PyTorch也拥有一个库torchsummary可以用来显示模型参数
使用pip命令即可安装

pip install torchsummary

显示模型参数只需要两行代码

from torchsummary import summary
summary(model, (28, 28))