LeNet现在主要指LeNet5,主要特征是将卷积层和下采样层相结合作为网络的基本结构。
输入为一个矩阵或者图像,大小为32X32。不计输入层,这个模型共有7层,3个卷积层,2个下采样层,1个全连接层和1个输出层。
C1:第一个卷积层。包含6个卷积特征图,每个特征图大小为28X28,由一个5X5的卷积核对输入图像进 行内卷积运算得到。
S2:第一个下采样层(池化层)。包含6个14X14的下采样特征图。每个下采样特征图都是由C1层的特征图经过2X2,步长为2的窗口进行平均池化,再利用sigmoid进行非线性变换处理。
C3:第二个卷积层。包含16个10X10卷积特征图。由一个5X5的卷积核对输入图像进行内卷积运算得到。
S4:第一个下采样层(池化层)。包含16个5X5的下采样特征图。每个下采样特征图都是由C1层的特征图经过2X2,步长为2的窗口进行平均池化,再利用sigmoid进行非线性变换处理。
C5:第三个卷积层。包含120个1X1卷积特征图。由一个5X5的卷积核对输入图像进行内卷积运算得到。
F6:全连接层。包含84个节点。
最后一层为输出层。全连接层。
from torch import nn
class LeNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=5, stride=1),
nn.ReLU())
self.max_pool1 = nn.MaxPool2d(2)
self.conv2 = nn.Sequential(nn.Conv2d(6, 16, kernel_size=5), nn.ReLU())
self.max_pool2 = nn.MaxPool2d(2)
self.fc1 = nn.Sequential(nn.Linear(16 * 5 * 5, 120), nn.ReLU())
self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU())
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.conv1(x)
x = self.max_pool1(x)
x = self.conv2(x)
x = self.max_pool2(x)
x = x.view(x.shape[0], -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
接下来进行训练,代码如下
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from model import LeNet
from torch.autograd import Variable
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 准备数据集并进行预处理
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(
mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(
root=r'E:\PycharmProjects\datatset\cifar-10-batches-py',
train=True,
transform=transform,
download=True)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=4, shuffle=False)
testset = torchvision.datasets.CIFAR10(
root=r'E:\PycharmProjects\datatset\cifar-10-batches-py',
train=False,
transform=transform,
download=True)
testloader = torch.utils.data.DataLoader(
testset, batch_size=4, shuffle=False)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = LeNet.LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
def train():
for epoch in range(30):
runing_loss = 0
for i, data in enumerate(trainloader, 0):
# data的结构是:[4x3x32x32的张量,长度4的张量],4是batch_size的数值
inputs, labels = data
inputs = inputs.to(device) # labels的结构为torch.Size([4]),代表这四张图片的类别
labels = labels.to(device)
inputs, labels = Variable(inputs), Variable(labels)
optimizer.zero_grad() # 将参数的grad值初始化为0
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print(loss)
# loss本身为Variable类型,所以要使用data获取其Tensor,因为其为标量,所以用loss.item()
runing_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss:%.3f'%(epoch + 1, i+1, runing_loss / (i+1)))
print('Training done')
def test():
correct = 0
total = 0
class_correct = list(0 for i in range(10))
class_total = list(0 for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
images = images.to(device)
labels = labels.to(device)
images, labels = Variable(images), Variable(labels)
outputs = net(images)
# torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
value, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
for i in range(4):
if predicted[i] == labels[i]:
correct += 1
# 计算每一类的正确率
_, class_predicted = torch.max(outputs, 1)
# 4组(batch_size)数据中,输出于label相同的,标记为1,否则为0
c = (predicted == labels).squeeze()
for j in range(4):
label = labels[j]
class_correct[label] += c[j].item()
class_total[label] += 1
for i in range(10):
print(
'Accuracy of %5s:%2d %%' %
(classes[i],
100 *
class_correct[i] /
class_total[i]))
print(
'Accuracy of the network on the 10000 test images: %d %%' %
(100 * correct / total))
torch.save(net, 'model.pth')
if __name__ == '__main__':
train()
test()
训练30轮后的正确率在60%左右。
针对同样的网络,在MNIST数据集上训练,代码如下:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from model import LeNet
from torch.autograd import Variable
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 准备数据集并进行预处理
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(
root=r'E:\PycharmProjects\datatset\MNIST',
train=True,
transform=transform,
download=True)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=4, shuffle=False)
testset = torchvision.datasets.MNIST(
root=r'E:\PycharmProjects\datatset\MNIST',
train=False,
transform=transform,
download=True)
testloader = torch.utils.data.DataLoader(
testset, batch_size=4, shuffle=False)
net = LeNet.LeNet_MNIST().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
EPOCH=10
def train():
for epoch in range(EPOCH):
runing_loss = 0
for i, data in enumerate(trainloader, 0):
# data的结构是:[4x3x32x32的张量,长度4的张量],4是batch_size的数值
inputs, labels = data
inputs = inputs.to(device) # labels的结构为torch.Size([4]),代表这四张图片的类别
labels = labels.to(device)
inputs, labels = Variable(inputs), Variable(labels)
optimizer.zero_grad() # 将参数的grad值初始化为0
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print(loss)
# loss本身为Variable类型,所以要使用data获取其Tensor,因为其为标量,所以用loss.item()
runing_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss:%.3f'%(epoch + 1, i+1, runing_loss / (i+1)))
print('Training done')
def test():
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images = images.to(device)
labels = labels.to(device)
images, labels = Variable(images), Variable(labels)
outputs = net(images)
# torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
value, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
for i in range(4):
if predicted[i] == labels[i]:
correct += 1
print(
'Accuracy of the network on the 10000 test images: %d %%' %
(100 * correct / total))
torch.save(net, './save/model_MNIST.pth')
if __name__ == '__main__':
train()
test()
同样经过10轮的训练,准确率可以达到98%。
利用保存好的模型进行单张图片的测试,代码如下:
import torch
import torchvision
import cv2
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('./save/model_MNIST.pth')
model = model.to(device)
model.eval() # 将模型转化为test模式
transform = transforms.Compose(
[transforms.ToTensor()])
img = cv2.imread('./test_pic/3.png')
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_NEAREST)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = 255-img
# cv2.imshow('pic', img)
# cv2.waitKey(0)
img = transform(img)
img = img.to(device)
# 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽].扩展后,为[1,1,28,28]
img = img.unsqueeze(0)
output = model(img)
prob = F.softmax(output, dim=1)
prob = Variable(prob)
print(prob)
value, predicted = torch.max(output, 1)
print(value)
print(predicted)