上文我们利用pytorch构建了BP神经网络,LeNet,这次我们利用LSTM网络实现对MNIST数据集的分类,具体的数据获取方法本文不详细介绍,这里只要讲解搭建LSTM网络的方法以及参数设置。
这里我们只用一层LSTM网络+全连接层实现对模型的构建。
# 输入为图片 (batch, seq_len, feature) 照片的每一行看作一个特征,一个特征的长度为32
INPUT_SIZE=32
HIDDEN_SIZE=10
LAYERS=2
DROP_RATE=0.2
TIME_STEP = 32
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
# 这里构建LSTM 还可以构建RNN、GRU等方法类似
self.rnn = nn.LSTM(
input_size=INPUT_SIZE,
hidden_size=HIDDEN_SIZE,
num_layers=LAYERS,
dropout=DROP_RATE,
batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature)
# 为False,输入输出数据格式是(seq_len, batch, feature),
)
self.hidden_out = nn.Linear(320, 10) #拼接隐藏层
self.sig = nn.Sigmoid() #分类需要利用Sigmod激活函数
def forward(self, x):
r_out, (h_s, h_c) = self.rnn(x)
out = r_out.reshape(-1,320) # 这里隐藏层设置为10,故得到结果[-1,32,10]展开
out = self.hidden_out(out) # 全连接层进行分类
out = self.sig(out)
return out
每一层的输出:
torch.Size([20, 32, 32])
torch.Size([20, 32, 10])
torch.Size([20, 320])
torch.Size([20, 10])
利用数据集训练模型,其余内容与之前内容相同,这里不在赘述,直接上代码。
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
class Config:
batch_size = 128
epoch = 10
alpha = 1e-3
print_per_step = 100 # 控制输出
device = torch.device('cuda:0')
INPUT_SIZE=32
HIDDEN_SIZE=10
LAYERS=2
DROP_RATE=0.2
TIME_STEP = 32
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.rnn = nn.LSTM(
input_size=INPUT_SIZE,
hidden_size=HIDDEN_SIZE,
num_layers=LAYERS,
dropout=DROP_RATE,
batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature)
# 为False,输入输出数据格式是(seq_len, batch, feature),
)
self.hidden_out = nn.Linear(320, 10)
self.sig = nn.Sigmoid()
def forward(self, x):
r_out, (h_s, h_c) = self.rnn(x)
out = r_out.reshape(-1,320)
out = self.hidden_out(out)
out = self.sig(out)
return out
class TrainProcess:
def __init__(self):
self.train, self.test = self.load_data()
self.net = LSTM().to(device)
self.criterion = nn.CrossEntropyLoss() # 定义损失函数
self.optimizer = optim.Adam(self.net.parameters(), lr=Config.alpha)
@staticmethod
def load_data():
train_data = datasets.MNIST(root='./data/',
train=True,
transform=transforms.Compose([
transforms.Resize((32, 32)),transforms.ToTensor()]
),
download=True)
test_data = datasets.MNIST(root='./data/',
train=False,
transform=transforms.Compose([
transforms.Resize((32, 32)), transforms.ToTensor()]
))
# 返回一个数据迭代器
# shuffle:是否打乱顺序
train_loader = torch.utils.data.DataLoader(dataset=train_data,
batch_size=Config.batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
batch_size=Config.batch_size,
shuffle=False)
return train_loader, test_loader
def train_step(self):
print("Training & Evaluating based on LSTM......")
file = 'result/train_mnist.txt'
fp = open(file,'w',encoding='utf-8')
fp.write('epoch\tbatch\tloss\taccuracy\n')
for epoch in range(Config.epoch):
print("Epoch {:3}.".format(epoch + 1))
for batch_idx,(data,label) in enumerate(self.train):
data, label = Variable(data.cuda()), Variable(label.cuda())
data = data.squeeze(dim=1)
self.optimizer.zero_grad()
outputs = self.net(data)
loss =self.criterion(outputs, label)
loss.backward()
self.optimizer.step()
# 每100次打印一次结果
if batch_idx % Config.print_per_step == 0:
_, predicted = torch.max(outputs, 1)
correct = 0
for _ in predicted == label:
if _:
correct += 1
accuracy = correct / Config.batch_size
msg = "Batch: {:5}, Loss: {:6.2f}, Accuracy: {:8.2%}."
print(msg.format(batch_idx, loss, accuracy))
fp.write('{}\t{}\t{}\t{}\n'.format(epoch,batch_idx,loss,accuracy))
fp.close()
test_loss = 0.
test_correct = 0
for data, label in self.test:
data, label = Variable(data.cuda()), Variable(label.cuda())
data = data.squeeze(dim=1)
outputs = self.net(data)
loss = self.criterion(outputs, label)
test_loss += loss * Config.batch_size
_, predicted = torch.max(outputs, 1)
correct = 0
for _ in predicted == label:
if _:
correct += 1
test_correct += correct
accuracy = test_correct / len(self.test.dataset)
loss = test_loss / len(self.test.dataset)
print("Test Loss: {:5.2f}, Accuracy: {:6.2%}".format(loss, accuracy))
torch.save(self.net.state_dict(), './result/raw_train_mnist_model.pth')
if __name__ == "__main__":
p = TrainProcess()
p.train_step()
有一点需要注意的是,我们模型的输入是[-1,32,32],但是数据集的shape是[-1,1,32,32],因此这里利用pytorch的方法压缩维度:
# 输入data.shape=[1,32,32] -> [32,32]
data = data.squeeze(dim=0)
# 输入data.shape=[32,32] -> [1,32,32]
data = data.unsqueeze(dim=0)