实现MNIST手写数字体的识别

实现MNIST手写数字体的识别

最近在找一些网络,想手动实现一下,于是就看到了MNIST数据集,于是找几个网络,自己试着实现一下。


文章目录

  • 实现MNIST手写数字体的识别
  • 前言
  • 一、数据集操作
  • 二、构造网络
    • 1.Vgg16_net 网络
    • 2.LeNet5 网络
  • 三、训练、测试函数
  • 四、全部代码
  • 五、可视化
  • 总结


前言

对于这个数据集输入是图像大小是 28*28,有时候所选的训练网络输入大小可能和MNIST数据集大小不一样,可能需要一些简单的操作对输入的大小进行简单的调整。


一、数据集操作

代码如下(示例):

import torch
from torch import nn
import matplotlib.pyplot as plt
import torchvision
from torchvision.transforms import ToTensor
import torch.nn.functional as F
from torch.utils.data import DataLoader


# 数据集有关操作
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=ToTensor(),
                                           download=True
                                           )
test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=ToTensor(),
                                          download=True
                                          )

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

二、构造网络

1.Vgg16_net 网络

代码如下(示例):

class VGG16_net(nn.Module):
    def __init__(self):
        super(VGG16_net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=3),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 16*16*64
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 8*8*128
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 1*1*512
        )
        self.conv = nn.Sequential(
            self.layer1,
            self.layer2,
            self.layer3,
            self.layer4,
            self.layer5
        )
        self.fc = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 512)
        x = self.fc(x)
        return x

2.LeNet5 网络

代码如下(示例):

class LeNets(nn.Module):
    def __init__(self):
        super(LeNets,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(16*5*5, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x

三、训练、测试函数

def fit(epoch, train_dataloader, test_dataloader, model, loss_fn, optimizer):
    correct, total = 0.0, 0.0
    running_loss = 0.0
    model.train()
    for x, y in train_dataloader:
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y_pred = torch.argmax(y_pred, dim=1)
                correct += (y_pred ==y).sum().item()
                total += y.size(0)
                running_loss +=loss.item()

    epoch_train_loss = running_loss / len(train_dataloader)
    epoch_train_acc = correct / total

    test_correct, test_total =0.0, 0.0
    test_running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in test_dataloader:
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_preds = model(x)
            loss = loss_fn(y_preds, y)
            y_preds = torch.argmax(y_preds, dim=1)
            test_correct += (y_preds == y).sum().item()
            test_total += y.size(0)
            test_running_loss  += loss.item()

    epoch_test_loss = test_running_loss / len(test_dataloader)
    epoch_test_acc = test_correct / test_total

    print('epoch: ', epoch,
          'train_loss: ', round(epoch_train_loss, 3),
          'train_acc: ', round(epoch_train_acc, 3),
          'test_loss: ', round(epoch_test_loss, 3),
          'test_acc: ', round(epoch_test_acc, 3)
          )
    return epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc


四、全部代码

# 提供 LeNet   LeNets  VGG16_net LeNet5
# 注意比较这几种网络内部细节处理的不同
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
# from shenjing import VGG16_net                            # 从 shenjing.py 导入 VGG16_net 函数
from torch.optim import lr_scheduler                        # 学习速率衰减策略
import datetime

# 数据集有关操作
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=ToTensor(),
                                           download=True
                                           )

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=ToTensor(),
                                          download=True
                                          )


train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)


class VGG16_net(nn.Module):
    def __init__(self):
        super(VGG16_net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=3),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),  # 32*32*64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 16*16*64
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),  # 16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 8*8*128
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),  # 8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),  # 2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.AvgPool2d(kernel_size=2, stride=2)  # 1*1*512
        )
        self.conv = nn.Sequential(
            self.layer1,
            self.layer2,
            self.layer3,
            self.layer4,
            self.layer5
        )
        self.fc = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 512)
        x = self.fc(x)
        return x


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear_1 = nn.Linear(28*28, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = x.view(-1, 1*28*28)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x

# 定义 LeNet 网络, 卷积层和全连接层后需要加激活函数,最后一层全连接无需加激活函数
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)  # 卷积层
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)                        # 最大池化层,下采样
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)   # 卷积层
        self.flatten = nn.Flatten()                                            # 展平
        self.linear_1 = nn.Linear(120, 84)                                     # 全连接
        self.linear_2 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = F.relu(self.c5(x))
        x = self.flatten(x)
        x = F.relu(self.linear_1(x))
        x = self.linear_2(x)
        return x


# 与 LeNets 效果一样,只是展平的操作不是一样
class LeNets(nn.Module):
    def __init__(self):
        super(LeNets,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(16*5*5, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x


class Net_5(nn.Module):
    def __init__(self):
        super(Net_5,self).__init__()
        self.conv_1 = nn.Conv2d(1, 6, 5)             # 卷积层  24*24*6
        self.pool_2 = nn.MaxPool2d((2, 2))           # 池化层  12*12*6
        self.conv_3 = nn.Conv2d(6, 16, 5)            # 卷积层  8*8*16
        self.linear_4 = nn.Linear(16*8*8, 256)
        self.linear_5 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = self.pool_2(x)
        x = F.relu(self.conv_3(x))
        # print(x.size())                              # torch.Size([32, 16, 8, 8]) batch:32  channels:16  size: 4*4
        x = x.view(-1, 1024)
        x = F.relu(self.linear_4(x))
        x = self.linear_5(x)
        return x


class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5,self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.linear_1 = nn.Linear(400, 120)
        self.linear_2 = nn.Linear(120, 84)
        self.linear_3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.c1(x))
        x = self.s2(x)
        x = F.relu(self.c3(x))
        x = self.s4(x)
        x = x.view(-1, 400)
        x = F.relu(self.linear_1(x))
        x = F.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x


# 定义设备及初始化模型
model = VGG16_net()
if torch.cuda.is_available():
    model.to('cuda')

def printlog(info):
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("\n" + "=========="*8 + "%s"%nowtime)
    print(str(info)+"\n")

# 优化器及损失函数初始化
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()


# 定义训练,测试函数
def fit(epoch, train_dataloader, test_dataloader, model, loss_fn, optimizer):
    correct, total = 0.0, 0.0
    running_loss = 0.0
    model.train()
    for x, y in train_dataloader:
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y_pred = torch.argmax(y_pred, dim=1)
                correct += (y_pred ==y).sum().item()
                total += y.size(0)
                running_loss +=loss.item()

    epoch_train_loss = running_loss / len(train_dataloader)
    epoch_train_acc = correct / total

    test_correct, test_total =0.0, 0.0
    test_running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in test_dataloader:
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_preds = model(x)
            loss = loss_fn(y_preds, y)
            y_preds = torch.argmax(y_preds, dim=1)
            test_correct += (y_preds == y).sum().item()
            test_total += y.size(0)
            test_running_loss  += loss.item()

    epoch_test_loss = test_running_loss / len(test_dataloader)
    epoch_test_acc = test_correct / test_total

    print('epoch: ', epoch,
          'train_loss: ', round(epoch_train_loss, 3),
          'train_acc: ', round(epoch_train_acc, 3),
          'test_loss: ', round(epoch_test_loss, 3),
          'test_acc: ', round(epoch_test_acc, 3)
          )
    return epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc


# 编写训练循环, epoch=50, 每一个 epoch 代表将全部数据集训练一遍
epochs = 30
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(1, epochs+1):
    printlog("Epoch: {0} / {1}".format(epoch, epochs))
    epoch_train_loss, epoch_train_acc, epoch_test_loss, epoch_test_acc = fit(epoch,
                                                                             train_dataloader,
                                                                             test_dataloader,
                                                                             model,
                                                                             loss_fn,
                                                                             optimizer
                                                                             )
    train_loss.append(epoch_train_loss)
    train_acc.append(epoch_train_acc)
    test_loss.append(epoch_test_loss)
    test_acc.append(epoch_test_acc)

print('done')
plt.plot(range(epochs), train_loss, label='train_loss')
plt.plot(range(epochs), test_loss, label='test_loss')
plt.plot(range(1, epochs+1), train_acc, label='train_acc')
plt.plot(range(1, epochs+1), test_acc, label='test_acc')
plt.legend()
plt.show()













五、可视化

实现MNIST手写数字体的识别_第1张图片

总结

上述给了两个不同的神经网络模型,如果想要更换,只需要在模型实例化语句上进行更换即可。model = VGG16_net().to(device) 对这条语句的网络进行更换即可。测试函数这个代码还需要进一步优化。

你可能感兴趣的:(人工智能,计算机视觉,pytorch)