手写数字识别Minst(CNN)

文章目录

  • 手写数字识别
    • 网络结构
    • 加载数据集
    • 数据集可视化
    • CNN网络结构
    • 训练模型
    • 保存模型和加载模型
    • 测试模型

手写数字识别

网络结构

网上给出的基本网络结构:
在这里插入图片描述
然而在本数据集中,输入图不是1*32*32,是1*28*28。所以正确的网络结构应该是

level input stride output
1 1*28*28 6*5*5 1 6*24*24
MaxPool 6*24*24 MaxPool 2 6*12*12
2 6*12*12 16*5*5 1 16*8*8
MaxPool 16*8*8 MaxPool 2 16*4*4
Flatten 16*4*4 Flatten 256
3FC 256 FC 120
4FC 120 FC 84
5FC 84 FC 10

加载数据集

# -*-coding =utf-8 -*-
import torch
import matplotlib.pyplot as plt
import torchvision

# 定义数据转换
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,))
])

# 加载数据集
batch_size=32
path = r'05data'
train_dataset = torchvision.datasets.MNIST(root=path, train=True,transform=transform,download =False)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = torchvision.datasets.MNIST(root=path, train=True,transform=transform,download =False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
# loader.shape=1875*[32*1*28*28,32]

最后loader.shape是1875*[32*1*28*28,32],即 number*[batch(data)*height*width, batch(label)]

数据集可视化


from sklearn.preprocessing import MinMaxScaler
# 归一化转为[0,255]
transfer=MinMaxScaler(feature_range=(0, 255)) 
def visualize_loader(batch,predicted=''): 
    # batch=[32*1*28*28,32]
    imgs=batch[0].squeeze().numpy() # 消squeeze()一维
    fig, axes = plt.subplots(4, 8, figsize=(12, 6))
    labels=batch[1].numpy()
    if str(predicted)=='':
        predicted=labels
    for i, ax in enumerate(axes.flat):
        ax.imshow(imgs[i])
        ax.set_title(predicted[i],color='black' if predicted[i]==labels[i] else 'red')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# loader.shape=1875*[32*1*28*28,32]
for batch in train_loader:       
    break
visualize_loader(batch)

在这里插入图片描述
上图是对数据集的可视化。

CNN网络结构

在PyTorch的torch.nn模块中,卷积函数Conv2d的输入张量的形状应为[batch_size, channels, height, width]对应数据集,无需修改(在一些架构中,可能是[batch_size, height, width, channels])。

# 创建模型
import torch
import torch.nn as nn
import torch.optim as optim

# 定义模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.flatten=nn.Flatten()
        self.fc3 = nn.Linear(256, 120)
        self.fc4 = nn.Linear(120, 84)
        self.fc5 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.fc5(x)
        return x

打印模型结构

model = CNN()
print(model)
CNN(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc3): Linear(in_features=256, out_features=120, bias=True)
  (fc4): Linear(in_features=120, out_features=84, bias=True)
  (fc5): Linear(in_features=84, out_features=10, bias=True)
)

训练模型

import torch.optim as optim

num_epochs=1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 统计准确率
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        running_loss += loss.item()
    
    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total
    
    # 在测试集上评估模型
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            test_loss += loss.item()
    
    test_loss = test_loss / len(test_loader)
    test_accuracy = correct / total
    
    # 打印训练过程中的损失和准确率
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
Epoch [1/1] - Train Loss: 0.0154, Train Accuracy: 0.9951, Test Loss: 0.0109, Test Accuracy: 0.9964

保存模型和加载模型


#torch.save(model.state_dict(), '05model.pth')

# 创建一个新的模型实例
model = CNN()
# 加载模型的参数
model.load_state_dict(torch.load('05model.pth'))

测试模型


for batch in test_loader:       
    break
imgs=batch[0]
outputs = model(imgs)
_, predicted = torch.max(outputs.data, 1)
predicted=predicted.numpy()

print(predicted)

visualize_loader(batch,predicted)

在这里插入图片描述

上图中可视化了其中的32次预测,只有第三行第四列的“8”被预测为“5”,其余均是正确。
在测试集的总体预测准确度为99.64%,正确率挺高的。

你可能感兴趣的:(深度学习,py,cnn,人工智能,神经网络)