基于pytorch的LSTM模型构建

上文我们利用pytorch构建了BP神经网络,LeNet,这次我们利用LSTM网络实现对MNIST数据集的分类,具体的数据获取方法本文不详细介绍,这里只要讲解搭建LSTM网络的方法以及参数设置。

这里我们只用一层LSTM网络+全连接层实现对模型的构建。

# 输入为图片 (batch, seq_len, feature) 照片的每一行看作一个特征,一个特征的长度为32
INPUT_SIZE=32
HIDDEN_SIZE=10
LAYERS=2
DROP_RATE=0.2
TIME_STEP = 32


class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        
        # 这里构建LSTM 还可以构建RNN、GRU等方法类似
        self.rnn = nn.LSTM(
            input_size=INPUT_SIZE,
            hidden_size=HIDDEN_SIZE,
            num_layers=LAYERS,
            dropout=DROP_RATE,
            batch_first=True  # 如果为True,输入输出数据格式是(batch, seq_len, feature)
            # 为False,输入输出数据格式是(seq_len, batch, feature),
        )
        self.hidden_out = nn.Linear(320, 10) #拼接隐藏层
        self.sig = nn.Sigmoid() #分类需要利用Sigmod激活函数

    def forward(self, x):
        r_out, (h_s, h_c)  = self.rnn(x)
        out = r_out.reshape(-1,320) # 这里隐藏层设置为10,故得到结果[-1,32,10]展开
        out = self.hidden_out(out) # 全连接层进行分类
        out = self.sig(out)
        return out

每一层的输出:

torch.Size([20, 32, 32])
torch.Size([20, 32, 10])
torch.Size([20, 320])
torch.Size([20, 10])

2、利用MNIST数据集训练模型

利用数据集训练模型,其余内容与之前内容相同,这里不在赘述,直接上代码。

import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np


class Config:
    batch_size = 128
    epoch = 10
    alpha = 1e-3
    print_per_step = 100  # 控制输出

device = torch.device('cuda:0')

INPUT_SIZE=32
HIDDEN_SIZE=10
LAYERS=2
DROP_RATE=0.2
TIME_STEP = 32

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()

        self.rnn = nn.LSTM(
            input_size=INPUT_SIZE,
            hidden_size=HIDDEN_SIZE,
            num_layers=LAYERS,
            dropout=DROP_RATE,
            batch_first=True  # 如果为True,输入输出数据格式是(batch, seq_len, feature)
            # 为False,输入输出数据格式是(seq_len, batch, feature),
        )
        self.hidden_out = nn.Linear(320, 10)
        self.sig = nn.Sigmoid()

    def forward(self, x):
        r_out, (h_s, h_c)  = self.rnn(x)
        out = r_out.reshape(-1,320)
        out = self.hidden_out(out)
        out = self.sig(out)
        return out


class TrainProcess:
    def __init__(self):
        self.train, self.test = self.load_data()
        self.net = LSTM().to(device)
        self.criterion = nn.CrossEntropyLoss()  # 定义损失函数
        self.optimizer = optim.Adam(self.net.parameters(), lr=Config.alpha)

    @staticmethod
    def load_data():
        train_data = datasets.MNIST(root='./data/',
                                    train=True,
                                    transform=transforms.Compose([
                                            transforms.Resize((32, 32)),transforms.ToTensor()]
                                        ),
                                    download=True)

        test_data = datasets.MNIST(root='./data/',
                                   train=False,
                                   transform=transforms.Compose([
                                       transforms.Resize((32, 32)), transforms.ToTensor()]
                                   ))

        # 返回一个数据迭代器
        # shuffle:是否打乱顺序
        train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                                   batch_size=Config.batch_size,
                                                   shuffle=True)

        test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                                  batch_size=Config.batch_size,
                                                  shuffle=False)
        return train_loader, test_loader

    def train_step(self):
        print("Training & Evaluating based on LSTM......")
        file = 'result/train_mnist.txt'
        fp = open(file,'w',encoding='utf-8')
        fp.write('epoch\tbatch\tloss\taccuracy\n')
        for epoch in range(Config.epoch):
            print("Epoch {:3}.".format(epoch + 1))
            for batch_idx,(data,label) in enumerate(self.train):
                data, label = Variable(data.cuda()), Variable(label.cuda())
                data = data.squeeze(dim=1)
                self.optimizer.zero_grad()
                outputs = self.net(data)
                loss =self.criterion(outputs, label)
                loss.backward()
                self.optimizer.step()
                # 每100次打印一次结果
                if batch_idx % Config.print_per_step == 0:
                    _, predicted = torch.max(outputs, 1)
                    correct = 0
                    for _ in predicted == label:
                        if _:
                            correct += 1
                    accuracy = correct / Config.batch_size
                    msg = "Batch: {:5}, Loss: {:6.2f}, Accuracy: {:8.2%}."
                    print(msg.format(batch_idx, loss, accuracy))
                    fp.write('{}\t{}\t{}\t{}\n'.format(epoch,batch_idx,loss,accuracy))
        fp.close()
        test_loss = 0.
        test_correct = 0
        for data, label in self.test:
            data, label = Variable(data.cuda()), Variable(label.cuda())
            data = data.squeeze(dim=1)
            outputs = self.net(data)
            loss = self.criterion(outputs, label)
            test_loss += loss * Config.batch_size
            _, predicted = torch.max(outputs, 1)
            correct = 0
            for _ in predicted == label:
                if _:
                    correct += 1
            test_correct += correct
        accuracy = test_correct / len(self.test.dataset)
        loss = test_loss / len(self.test.dataset)
        print("Test Loss: {:5.2f}, Accuracy: {:6.2%}".format(loss, accuracy))
        torch.save(self.net.state_dict(), './result/raw_train_mnist_model.pth')


if __name__ == "__main__":
    p = TrainProcess()
    p.train_step()

有一点需要注意的是,我们模型的输入是[-1,32,32],但是数据集的shape是[-1,1,32,32],因此这里利用pytorch的方法压缩维度:

# 输入data.shape=[1,32,32] -> [32,32]
data = data.squeeze(dim=0)

# 输入data.shape=[32,32] -> [1,32,32]
data = data.unsqueeze(dim=0)

你可能感兴趣的:(pytorch,pytorch,lstm,神经网络)