LeNet学习笔记(训练cifar10数据集和mnist数据集pytorch)

网络结构

LeNet现在主要指LeNet5,主要特征是将卷积层和下采样层相结合作为网络的基本结构。LeNet学习笔记(训练cifar10数据集和mnist数据集pytorch)_第1张图片
输入为一个矩阵或者图像,大小为32X32。不计输入层,这个模型共有7层,3个卷积层,2个下采样层,1个全连接层和1个输出层。
C1:第一个卷积层。包含6个卷积特征图,每个特征图大小为28X28,由一个5X5的卷积核对输入图像进 行内卷积运算得到。
S2:第一个下采样层(池化层)。包含6个14X14的下采样特征图。每个下采样特征图都是由C1层的特征图经过2X2,步长为2的窗口进行平均池化,再利用sigmoid进行非线性变换处理。
C3:第二个卷积层。包含16个10X10卷积特征图。由一个5X5的卷积核对输入图像进行内卷积运算得到。
S4:第一个下采样层(池化层)。包含16个5X5的下采样特征图。每个下采样特征图都是由C1层的特征图经过2X2,步长为2的窗口进行平均池化,再利用sigmoid进行非线性变换处理。
C5:第三个卷积层。包含120个1X1卷积特征图。由一个5X5的卷积核对输入图像进行内卷积运算得到。
F6:全连接层。包含84个节点。
最后一层为输出层。全连接层。
LeNet学习笔记(训练cifar10数据集和mnist数据集pytorch)_第2张图片

网络代码

from torch import nn

class LeNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=5, stride=1),
            nn.ReLU())
        self.max_pool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Sequential(nn.Conv2d(6, 16, kernel_size=5), nn.ReLU())
        self.max_pool2 = nn.MaxPool2d(2)

        self.fc1 = nn.Sequential(nn.Linear(16 * 5 * 5, 120), nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU())

        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.max_pool2(x)

        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

接下来进行训练,代码如下

训练代码

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from model import LeNet
from torch.autograd import Variable

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 准备数据集并进行预处理

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(
    mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(
    root=r'E:\PycharmProjects\datatset\cifar-10-batches-py',
    train=True,
    transform=transform,
    download=True)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=4, shuffle=False)

testset = torchvision.datasets.CIFAR10(
    root=r'E:\PycharmProjects\datatset\cifar-10-batches-py',
    train=False,
    transform=transform,
    download=True)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=4, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

net = LeNet.LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


def train():
    for epoch in range(30):
        runing_loss = 0
        for i, data in enumerate(trainloader, 0):

            # data的结构是:[4x3x32x32的张量,长度4的张量],4是batch_size的数值
            inputs, labels = data
            inputs = inputs.to(device)  # labels的结构为torch.Size([4]),代表这四张图片的类别
            labels = labels.to(device)
            inputs, labels = Variable(inputs), Variable(labels)
            optimizer.zero_grad()   # 将参数的grad值初始化为0

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # print(loss)

            # loss本身为Variable类型,所以要使用data获取其Tensor,因为其为标量,所以用loss.item()
            runing_loss += loss.item()

            if i % 2000 == 1999:
                print('[%d, %5d] loss:%.3f'%(epoch + 1, i+1, runing_loss / (i+1)))
    print('Training done')


def test():
    correct = 0
    total = 0
    class_correct = list(0 for i in range(10))
    class_total = list(0 for i in range(10))

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            images, labels = Variable(images), Variable(labels)

            outputs = net(images)
            # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
            value, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            for i in range(4):
                if predicted[i] == labels[i]:
                    correct += 1

            # 计算每一类的正确率
            _, class_predicted = torch.max(outputs, 1)
            # 4组(batch_size)数据中,输出于label相同的,标记为1,否则为0
            c = (predicted == labels).squeeze()

            for j in range(4):
                label = labels[j]
                class_correct[label] += c[j].item()
                class_total[label] += 1

    for i in range(10):
        print(
            'Accuracy of %5s:%2d %%' %
            (classes[i],
             100 *
             class_correct[i] /
             class_total[i]))
    print(
        'Accuracy of the network on the 10000 test images: %d %%' %
        (100 * correct / total))

    torch.save(net, 'model.pth')


if __name__ == '__main__':

    train()

    test()

训练30轮后的正确率在60%左右。
针对同样的网络,在MNIST数据集上训练,代码如下:

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from model import LeNet
from torch.autograd import Variable

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 准备数据集并进行预处理

transform = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(
    root=r'E:\PycharmProjects\datatset\MNIST',
    train=True,
    transform=transform,
    download=True)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=4, shuffle=False)

testset = torchvision.datasets.MNIST(
    root=r'E:\PycharmProjects\datatset\MNIST',
    train=False,
    transform=transform,
    download=True)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=4, shuffle=False)

net = LeNet.LeNet_MNIST().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
EPOCH=10


def train():
    for epoch in range(EPOCH):
        runing_loss = 0
        for i, data in enumerate(trainloader, 0):

            # data的结构是:[4x3x32x32的张量,长度4的张量],4是batch_size的数值
            inputs, labels = data
            inputs = inputs.to(device)  # labels的结构为torch.Size([4]),代表这四张图片的类别
            labels = labels.to(device)
            inputs, labels = Variable(inputs), Variable(labels)
            optimizer.zero_grad()   # 将参数的grad值初始化为0

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # print(loss)

            # loss本身为Variable类型,所以要使用data获取其Tensor,因为其为标量,所以用loss.item()
            runing_loss += loss.item()

            if i % 2000 == 1999:
                print('[%d, %5d] loss:%.3f'%(epoch + 1, i+1, runing_loss / (i+1)))
    print('Training done')


def test():
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            images, labels = Variable(images), Variable(labels)

            outputs = net(images)
            # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
            value, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            for i in range(4):
                if predicted[i] == labels[i]:
                    correct += 1
    print(
        'Accuracy of the network on the 10000 test images: %d %%' %
        (100 * correct / total))

    torch.save(net, './save/model_MNIST.pth')


if __name__ == '__main__':

    train()

    test()

同样经过10轮的训练,准确率可以达到98%。
LeNet学习笔记(训练cifar10数据集和mnist数据集pytorch)_第3张图片
利用保存好的模型进行单张图片的测试,代码如下:

import torch
import torchvision
import cv2
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable


if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = torch.load('./save/model_MNIST.pth')
    model = model.to(device)
    model.eval()   # 将模型转化为test模式

    transform = transforms.Compose(
        [transforms.ToTensor()])

    img = cv2.imread('./test_pic/3.png')
    img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_NEAREST)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = 255-img
    # cv2.imshow('pic', img)
    # cv2.waitKey(0)
    img = transform(img)
    img = img.to(device)
    # 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽].扩展后,为[1,1,28,28]
    img = img.unsqueeze(0)
    output = model(img)

    prob = F.softmax(output, dim=1)
    prob = Variable(prob)
    print(prob)

    value, predicted = torch.max(output, 1)
    print(value)
    print(predicted)

预测的结果如图所示:
LeNet学习笔记(训练cifar10数据集和mnist数据集pytorch)_第4张图片

你可能感兴趣的:(卷积神经网络)