利用 LeNet-5 实现 MNIST 手写数字识别(Pytorch)

西电-计算机视觉-实验三

一、实验要求

利用 LeNet-5 实现 MNIST 手写数字识别(Pytorch)_第1张图片在这里插入图片描述

二、实验分析

1.LeNet网络结构如下:

利用 LeNet-5 实现 MNIST 手写数字识别(Pytorch)_第2张图片

2.MNIST数据集简介:

(1)数据集包括60000个用于训练的示例和10000个用于测试的示例;
(2)数据集包括0-9共10类手写数字图片,每张图片都做了尺寸归一化,都是28*28大小。

3.手写字体识别流程:

(1)定义超参数 ;
(2)构建transforms(主要是对图像做变换);
(3)下载、加载数据集MNIST;
(4)构建网络模型;
(5)定义优化器;
(6)定义训练方法;
(7)定义测试方法;
(8)训练模型并输出预测结果。

三、代码实现

#!/usr/bin/env python
# @Date: 2021/12/10
# @Desc: 手写字体识别

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms      


# --------------step1: 定义超参数-------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 是否用GPU
EPOCHS = 10  # 数据集的训练次数
BATCH_SIZE = 16  # 每批处理的数据 16/32/64/128

# -------------step2: 构建transform(对图像做处理)---------
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图片转成成tensor
    transforms.Normalize((0.1307, ), (0.3081, ))  # 标准化 => x' = (x-μ)/σ
])

# -------------step3: 下载并加载数据集------------------
# 下载数据集
train_set = datasets.MNIST("data_sets", train=True, download=True, transform=transform)
test_set = datasets.MNIST("data_sets", train=False, download=True, transform=transform)
# 加载数据集
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)


# -------------step4: 构建网络模型--------------------
class LeNet(nn.Module):
    """
    A neural network with:
      2 Conbolutions
      3 Full connnection
    """
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, (5, 5), (1, ), 2)  # 1:输入通道, 6:输出通道, (5,5):kernel_size, 1:stride, 2:padding
        self.conv2 = nn.Conv2d(6, 16, (5, 5))  # 6:输入通道, 16:输出通道, (5,5):kernel_size
        self.fc1 = nn.Linear(16*5*5, 120)   # 16*5*5:输入通道, 120:输出通道
        self.fc2 = nn.Linear(120, 84)  # 输入通道:120, 输出通道:84
        self.fc3 = nn.Linear(84, 10)  # 输入通道:84, 输出通道:10

    def forward(self, x):
        x = self.conv1(x)  # 输入:batch*1*28*28 输出:batch*6*28*28 (28+2*2-5+1=28)
        x = F.relu(x)      # 激活层 输出:batch**6*28*28
        x = F.max_pool2d(x, 2, 2)  # 池化层/下采样 输入:batch*6*28*28 输出:batch*6*14*14

        x = self.conv2(x)  # 输入:batch*6*14*14 输出:batch*16*10*10  (14+2*0-5+1=10)
        x = F.relu(x)  # 激活层 输出:batch*16*10*10
        x = F.max_pool2d(x, 2, 2)  # 池化层/下采样 输入:batch*16*10*10 输出:16*5*5

        x = x.view(x.size(0), -1)  # 拉平 (-1指自动计算维度) 16*5*5

        x = self.fc1(x)  # 输入:batch*16*5*5  输出: batch*120
        x = F.relu(x)    # 激活层 输出:batch*120
        x = self.fc2(x)  # 输入:batch*120  输出:batch*84
        x = F.relu(x)    # 激活层  输出:batch*84
        x = self.fc3(x)  # 输入:batch*84  输出:batch*10
        output = F.softmax(x, dim=1)  # 计算分类后每个数字的概率值

        return output


# ----------------step:5 定义优化器--------------------------
model = LeNet().to(DEVICE)
optimizer = optim.Adam(model.parameters())


# ----------------step6: 定义训练方法-----------------------
def train_model(my_model, device, trains_loader, optimizers, epoches):
    # 模型训练
    my_model.train()
    for batch_idx, (data, target) in enumerate(trains_loader):
        # 将data和target部署到DEVICE上去
        data, target = data.to(device), target.to(device)
        # 将梯度初始化为0
        optimizers.zero_grad()
        # 训练所得的结果
        output = my_model(data)
        # 计算交叉熵损失
        loss = F.cross_entropy(output, target)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizers.step()
        # 每100batch_size打印一次log
        if batch_idx % 1000 == 0:
            print("Training Epoch:{} \t Loss:{:.5f}".format(epoches, loss.item()))


# ----------------step7: 定义测试方法------------------------
def test_model(my_model, device, test_loder):
    my_model.eval()  # 模型验证
    correct = 0.0    # 正确率
    test_loss = 0.0   # 测试损失
    with torch.no_grad():  # 测试时不计算梯度,也不进行反向传播
        for data, target in test_loder:
            # 将data和target部署到device上
            data, target = data.to(device), target.to(device)
            # 测试所得的结果
            output = my_model(data)
            # 计算交叉熵损失
            test_loss += F.cross_entropy(output, target).item()
            # 找到概率最大的下标
            predict = output.argmax(dim=1)
            # predict = torch.max(output, dim=1)
            correct += predict.eq(target.view_as(predict)).sum().item()  # 累计正确的值
        # 计算平均损失
        avg_loss = test_loss / len(test_loder.dataset)
        # 计算准确率
        correct_ratio = 100 * correct / len(test_loder.dataset)
        print("Average_loss in test:{:.5f}\t Accuracy:{:.5f}\n".format(
            avg_loss, correct_ratio
        ))


# --------------step8: 训练模型----------------------------
for epoch in range(1, EPOCHS+1):
    train_model(model, DEVICE, train_loader, optimizer, epoch)
    test_model(model, DEVICE, test_loader)


四、实验结果(部分)

利用 LeNet-5 实现 MNIST 手写数字识别(Pytorch)_第3张图片

你可能感兴趣的:(深度学习,计算机视觉,机器学习)