Pytorch 搭建神经网络详细注释

目录

  • 搭建神经网络步骤
  • 需要导入的包
  • 下载训练集
  • 搭建神经网络
  • 神经网络训练
  • 测试网络
  • 在GPU上训练
  • 完整代码

搭建神经网络步骤

  1. 训练一个图片分类器
  2. 通过 torchvision 加载 CIFAR10 里面的训练和测试数据集,并对数据进行标准化
  3. 定义卷积神经网络
  4. 定义损失函数
  5. 利用训练数据训练网络
  6. 利用测试数据测试网络

需要导入的包

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import ssl
import matplotlib.pyplot as plt
import numpy as np

下载训练集

该部分在首次运行时会自动下载,在 linux 环境下 num_workers 可以进行参数设置

ssl._create_default_https_context = ssl._create_unverified_context

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

搭建神经网络

这里以 (3, 32, 32) 的图片为例,其中 3 指三通道的图片,即红黄蓝,像素大小为 32 × 32

卷积层计算公式:(像素长宽 - 卷积核长宽) / 步长 + 1
self.conv1 = nn.Conv2d(3, 6, 5)
经过卷积层,输入 3 通道,输出 6 通道,步长 1 × 1,卷积核为 5 × 5
图像变成 (6, 28, 28) 的结构

self.pool = nn.MaxPool2d(2, 2)
经过池化层,步长 2 × 2,图像进行缩小
图像变成 (6, 14, 14) 的结构

self.conv2 = nn.Conv2d(6, 16, 5)
经过卷积层,输入 6 通道,输出 16 通道,卷积核为 5 × 5
原图变成 (16, 10, 10) 的结构

self.pool = nn.MaxPool2d(2, 2)
经过池化层,步长 2 × 2,图像进行缩小
图像变成 (16, 5, 5) 的结构

self.fc1 = nn.Linear(16 * 5 * 5, 120)
经过全连接层,后面要跟 relu 激活函数
空间从 16 × 5 × 5 压缩为 120

self.fc2 = nn.Linear(120, 84)
经过全连接层,后面要跟 relu 激活函数
空间从 120 压缩为 84

self.fc3 = nn.Linear(84, 10)
经过全连接层,后面不跟激活函数
空间从 84 压缩为 10

# 可视化训练数据
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    # 维度转换
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# 随机获取训练图片
# dataiter = iter(trainloader)
# images, labels = dataiter.next()
# 显示图片,make_grid 拼图像
# imshow((torchvision.utils.make_grid(images)))


# 打印图片标签,这里每个元素的长度一样,中间再加上' '所以会导致间距不一样的情况
# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# 查看神经网络
net = Net()
print(net)

运行结果

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

神经网络训练

def train():
    for epoch in range(3):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            # 反向传播,参数更新
            loss.backward()
            optimizer.step()
            # 打印轮次和损失值
            running_loss += loss.item()
            if (i + 1) % 2000 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    print('Finished Training')
    torch.save(net.state_dict(), PATH)
    print('Finished Saving')


# 定义损失函数和优化器,交叉熵损失函数和随机梯度下降优化器
criterion = nn.CrossEntropyLoss()
# net.parameters():权重w,偏置b。lr:学习率。momentum:冲量(加速度)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 训练,保存训练模型
PATH = './cifar_net.pth'
train()

运行结果

[1,  2000] loss: 2.127
[1,  4000] loss: 1.800
[1,  6000] loss: 1.656
[1,  8000] loss: 1.566
[1, 10000] loss: 1.501
[1, 12000] loss: 1.442
[2,  2000] loss: 1.403
[2,  4000] loss: 1.364
[2,  6000] loss: 1.340
[2,  8000] loss: 1.323
[2, 10000] loss: 1.309
[2, 12000] loss: 1.293
[3,  2000] loss: 1.204
[3,  4000] loss: 1.225
[3,  6000] loss: 1.223
[3,  8000] loss: 1.200
[3, 10000] loss: 1.198
[3, 12000] loss: 1.207
Finished Training
Finished Saving

测试网络

在该部分中发现对不同标签的训练效果不同,跟图像和训练数量都有一定的关系,未能达到 100% 是很正常的现象。

# 输出图片
dataiter = iter(testloader)
images, labels = dataiter.next()
# 原始图像
imshow(torchvision.utils.make_grid(images))
# 真实标签
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

net = Net()
net.load_state_dict(torch.load(PATH))
# 利用模型对图片进行预测
outputs = net(images)
# 共10个类别。采用模型计算出的概率最大的作为预测的类别
# 不加_,返回的是一行中最大的数。加_,则返回一行中最大数的位置。
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

# 测试表现好的类,表现差的类
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

运行结果

GroundTruth:    cat  ship  ship plane
Predicted:   ship  ship  ship  ship
Accuracy of the network on the 10000 test images: 56 %
Accuracy of plane : 42 %
Accuracy of   car : 85 %
Accuracy of  bird : 47 %
Accuracy of   cat : 27 %
Accuracy of  deer : 42 %
Accuracy of   dog : 56 %
Accuracy of  frog : 80 %
Accuracy of horse : 59 %
Accuracy of  ship : 80 %
Accuracy of truck : 41 %

Pytorch 搭建神经网络详细注释_第1张图片

在GPU上训练

通过 to() 方法进行适配,依然可以在 CPU 上运行

net.to(device)
inputs, labels = inputs.to(device), labels.to(device)

完整代码

# 训练一个图片分类器
# 通过torchvision加载CIFAR10里面的训练和测试数据集,并对数据进行标准化
# 定义卷积神经网络
# 定义损失函数
# 利用训练数据训练网络
# 利用测试数据测试网络
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import ssl
import matplotlib.pyplot as plt
import numpy as np

# 下载训练集
# 全局取消证书验证
ssl._create_default_https_context = ssl._create_unverified_context

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# 可视化训练数据
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    # 维度转换
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# 随机获取训练图片
# dataiter = iter(trainloader)
# images, labels = dataiter.next()
# 显示图片,make_grid 拼图像
# imshow((torchvision.utils.make_grid(images)))


# 打印图片标签,这里每个元素的长度一样,中间再加上' '所以会导致间距不一样的情况
# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


def train():
    for epoch in range(3):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            # 反向传播,参数更新
            loss.backward()
            optimizer.step()
            # 打印轮次和损失值
            running_loss += loss.item()
            if (i + 1) % 2000 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    print('Finished Training')
    torch.save(net.state_dict(), PATH)
    print('Finished Saving')


# 查看神经网络
net = Net()
print(net)

# 定义损失函数和优化器,交叉熵损失函数和随机梯度下降优化器
criterion = nn.CrossEntropyLoss()
# net.parameters():权重w,偏置b。lr:学习率。momentum:冲量(加速度)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 训练,保存训练模型
PATH = './cifar_net.pth'
train()

# 输出图片
dataiter = iter(testloader)
images, labels = dataiter.next()
# 原始图像
imshow(torchvision.utils.make_grid(images))
# 真实标签
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

net = Net()
net.load_state_dict(torch.load(PATH))
# 利用模型对图片进行预测
outputs = net(images)
# 共10个类别。采用模型计算出的概率最大的作为预测的类别
# 不加_,返回的是一行中最大的数。加_,则返回一行中最大数的位置。
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

# 测试表现好的类,表现差的类
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# 在GPU上训练
# net.to(device)
# inputs, labels = inputs.to(device), labels.to(device)

你可能感兴趣的:(#,神经网络,神经网络,pytorch,机器学习,python,人工智能)