LeNet5 是一个非常有名也非常老的模型,可以说是第一个实践效果好的模型,由Yann LeCun在1994年提出,用于手写数字的自动识别。在很多教科书里面也是入门的经典网络。
这个网络被用于邮票上的手写数字的自动识别,并且有了非常好的识别准确率,这才是的神经网络重新回到大众的视野。当然,神经网络成为热门还要到近10年之后Hinton的学生Alex推出的AlexNet在LSVRC中大放异彩。
数据集直接调用MINST,其中的图片都是手写数字照片,如下图所示:
损失函数 | 交叉熵损失 |
优化器 | 带向量的随机梯度下降 |
学习率 | 0.001,且每10轮降低为0.1倍 |
epoch | 50 |
batch size | 16 |
这个我是用jupyter写成了一个文件,建议大家写的时候数据集、模型分开写。
# -*- coding: UTF-8 -*-
# LeNet5
# SAY @ 202012
import torch
from torch import nn
# from net import MyLeNet5 # 导入编写好的网络模型
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import ToPILImage
from torch.autograd import Variable
import os
import time
%matplotlib inline # 这个是我jupyter里面用的,不是的话删掉
import matplotlib.pyplot as plt
# 定义一个网络模型
class MyLeNet5(nn.Module):
# 初始化网络
def __init__(self):
super(MyLeNet5, self).__init__()
# 第一个卷积层
self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
# 激活函数
self.Sigmoid = nn.Sigmoid()
# 第一个池化层
self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
# 第二个卷积层
self.c3 = nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5)
# 第二个池化层
self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
# 第二个卷积层
self.c5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
# 平展层
self.flatten = nn.Flatten()
self.f6 = nn.Linear(120,84) # 线性连接
self.output = nn.Linear(84,10)
def forward(self,x): # 编写其前向函数
x = self.Sigmoid(self.c1(x))
x = self.s2(x)
x = self.Sigmoid(self.c3(x))
x = self.s4(x)
x = self.c5(x)
x = self.flatten(x)
x = self.f6(x)
x = self.output(x)
return x
# 数据转换为tensor
data_transform = transforms.Compose([transforms.ToTensor()])
# 加载训练数据集
train_dataset = datasets.MNIST(root="./data",train=True,transform=data_transform,download=True)
train_dataloader = DataLoader(train_dataset,batch_size=16,shuffle = True)
# 加载测试数据集
test_dataset = datasets.MNIST(root="./data",train=False,transform=data_transform,download=True)
test_dataloader = DataLoader(test_dataset,batch_size=16,shuffle = True)
# 转到GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
# 调用模型,将模型转到GPU
model = MyLeNet5().to(device)
# 定义一个损失函数(交叉熵损失)
loss_fn = nn.CrossEntropyLoss()
# 定义一个优化器
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
# 学习率每隔10轮,变为原来的0.1
lr_scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.14)
# 训练函数
def train(dataloader,model,loss_fn,optimizer):
loss,accuracy,n = 0.0,0.0,0
for batch,(X,y) in enumerate(dataloader):
# 前向传播
X,y = X.to(device),y.to(device)
output = model(X)
cur_loss = loss_fn(output,y)
_,pred = torch.max(output, axis = 1)
cur_acc = torch.sum(y==pred)/output.shape[0]
optimizer.zero_grad()
cur_loss.backward()
optimizer.step()
loss += cur_loss.item()
accuracy += cur_acc.item()
n = n + 1
print("train loss ",loss/n)
print("train accuracy ", accuracy/n)
return loss/n,accuracy/n
def val(dataloader,model,loss_fn):
model.eval()
loss, accuracy, n = 0.0, 0.0, 0
with torch.no_grad():
for batch, (X, y) in enumerate(dataloader):
# 前向传播
X, y = X.to(device), y.to(device)
output = model(X)
cur_loss = loss_fn(output, y)
_, pred = torch.max(output, axis=1)
cur_acc = torch.sum(y == pred) / output.shape[0]
loss += cur_loss.item()
accuracy += cur_acc.item()
n = n + 1
print("val loss ", loss / n)
print("val accuracy ", accuracy / n)
return loss / n, accuracy / n
def train_imple():
# 开始训练
epoch = 50 # 训练次数
min_acc = 0
train_l_list = []
train_a_list = []
val_l_list = []
val_a_list = []
for t in range(epoch):
start = time.clock()
print(f'epoch{t+1}\n---------------')
train_loss,train_acc = train(train_dataloader,model,loss_fn,optimizer)
val_loss,val_acc = val(test_dataloader,model,loss_fn)
train_l_list.append(train_loss)
train_a_list.append(train_acc)
val_l_list.append(val_loss)
val_a_list.append(val_acc)
# 保存最好的模型权重
if val_acc > min_acc:
folder = "save_model"
if not os.path.exists(folder):
os.mkdir(folder)
min_acc = val_acc
print("save best model")
torch.save(model.state_dict(),folder+"/best_model.pth")
# 中间写上代码块
end = time.clock()
print('Running time: %s Seconds' % (end - start))
print("Done")
plt.plot(train_l_list,label="train loss")
plt.plot(val_l_list,label="val loss")
plt.legend()
plt.show()
plt.plot(train_a_list,label="train accuracy")
plt.plot(val_a_list,label="val accuracy")
plt.legend()
plt.show()
# 实施训练
train_imple()
这个模型非常简单,数据集也非常简单,训练起来非常快,我自己的电脑gpu运行一个epoch大概在14s左右(GTX1650 4G),运行这个的时候还比较小白,没看显存占用的情况。训练的损失函数和判断准确度如下图所示。
注意,我们保存了验证集准确度最高的一个模型,这是一种正则化操作,用以防止Overfitting, 很重要。
实际上这个模型到这就结束了,但是刚做的时候还是想看一下他实际预测怎么样的,可以看下面的test。
def test(index):
# 数据转换为tensor
data_transform = transforms.Compose([transforms.ToTensor()])
# 加载训练数据集
train_dataset = datasets.MNIST(root="./data",train=True,transform=data_transform,download=True)
train_dataloader = DataLoader(train_dataset,batch_size=16,shuffle = True)
# 加载测试数据集
test_dataset = datasets.MNIST(root="./data",train=False,transform=data_transform,download=True)
test_dataloader = DataLoader(test_dataset,batch_size=16,shuffle = True)
# 转到GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
# 调用模型,将模型转到GPU
model = MyLeNet5().to(device)
model.load_state_dict(torch.load(".\\save_model\\best_model.pth"))
# 获取结果
classes = ["0","1","2","3","4","5","6","7","8","9"]
# 把tensor转化为图片
show = ToPILImage()
i = index
X,y = test_dataset[i][0],test_dataset[i][1]
# show(X).show()
plt.imshow(show(X),cmap = plt.cm.gray_r)
plt.show()
start = time.clock()
X = Variable(torch.unsqueeze(X,dim=0).float(),requires_grad=False).to(device)
with torch.no_grad():
pred = model(X)
predict, actual = classes[torch.argmax(pred[0])],classes[y]
print("predicted:",predict," actual:",actual)
end = time.clock()
print('Running time: %s Seconds' % (end - start))
test(45)
输出的图片如下
predicted: 5 actual: 5
Running time: 0.0012152999988757074 Seconds
运行的时候时间花费在ms量级,还是很快的。