LeNet

LeNet

  • 网络结构
  • 代码
    • model.py
    • train.py
    • predict.py

网络结构

LeNet_第1张图片

代码

LeNet_第2张图片

model.py

import torch.nn as nn
import torch.nn.functional as F


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)        # input(3, 32, 32), output(16, 28, 28)
        self.pool1 = nn.MaxPool2d(2, 2)         # (16, 14, 14)
        self.conv2 = nn.Conv2d(16, 32, 5)       # (32, 10, 10)
        self.pool2 = nn.MaxPool2d(2, 2)         # (32, 5, 5)
        self.fc1 = nn.Linear(32 * 5 * 5, 120)   # (120)
        self.fc2 = nn.Linear(120, 84)           # (84)
        self.fc3 = nn.Linear(84, 10)            # (10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1, 32 * 5 * 5)              # (32 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        

train.py

from model import LeNet
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.transforms as transforms
# import numpy as np
# import matplotlib.pyplot as plt

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    # Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
    # Converts a PIL Image or numpy.ndarray (H x W x C) in the range
    # [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    # Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels,
    # 原图像为 RGB 3通道, 因此 mean 和 std 都有三个值
    # ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
])
# 数据加载
trainset = tv.datasets.CIFAR10(
    root='C:/Users/14251/Desktop/workspace/LeNet/data',
    train=True,
    transform=transform,
    download=False)   
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=36,
                                          shuffle=True,
                                          num_workers=0)

testset = tv.datasets.CIFAR10(
    root='C:/Users/14251/Desktop/workspace/LeNet/data',
    train=False,
    transform=transform,
    download=False)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=10000,
                                         shuffle=True,
                                         num_workers=0)

# iter()迭代器
testdata_iter = iter(testloader)
testdata, testlabel = testdata_iter.next()

# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# def imshow(img):
#     img = img / 2 + 0.5   # output = (input - 0.5) / 0.5 -> input = output / 2 + 0.5, 还原图像
#     npimg = img.numpy()
#     # ToTensor()之后, 图像为(N, C, H, W) (x, 0, 1, 2), 要还原为图像的(H, W, C), 不考虑N, 要还原为(x, 1, 2, 0)
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# print(' '.join('%5s' % classes[testlabel[j]] for j in range(4)))
# imshow(tv.utils.make_grid(testdata))

net = LeNet()
# print(net)

# 损失函数与优化器
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

# 训练网络
for epoch in range(5):
    running_loss = 0
    for step, data in enumerate(trainloader, start=0):
        traindata, trainlabel = data
        optimizer.zero_grad()
        output = net(traindata)
        loss = loss_func(output, trainlabel)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # 每500次测试一次
        if step % 499 == 0:
            with torch.no_grad():
                output = net(testdata)
                pred = torch.max(output, dim=1)[1]
                accuracy = (pred == testlabel).sum().item() / testlabel.size(0)

                print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 500, accuracy))
                running_loss = 0

print('Finished Training')

# 保存模型
torch.save(net.state_dict(),
           'C:/Users/14251/Desktop/workspace/LeNet/LeNet_dict.pth')
           

predict.py

from model import LeNet
import torch
import torchvision.transforms as transforms
from PIL import Image   # 打开单张图像,文件夹用torchvision.datasets.ImageFolder()

transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

img = Image.open('C:/Users/14251/Desktop/workspace/LeNet/test2.jpg')
img = transform(img)    # (H, W, C) -> (C, H, W)
img = img.unsqueeze(dim=0)  # (C, H, W) -> (N, C, H, W)

net = LeNet()
net.load_state_dict(
    torch.load('C:/Users/14251/Desktop/workspace/LeNet/LeNet_dict.pth'))

with torch.no_grad():
    output = net(img)
    predict = torch.max(output, dim=1)[1].item()

print(classes[int(predict)])

你可能感兴趣的:(笔记,深度学习)