1.pytorch官方的demo实例(搭建Lenet并基于CIFAR10训练)

搭建Lenet并基于CIFAR10训练

1.pytorch官方的demo实例(搭建Lenet并基于CIFAR10训练)_第1张图片

1.model.py

import torch.nn as nn
import torch.nn.functional as F

#详细解释:http://lihuaxi.xjx100.cn/news/105304.html

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()    #https://blog.csdn.net/qq_31244453/article/details/104657532
        self.conv1 = nn.Conv2d(3, 16, 5) # 1.in_channels输入通道数 2.卷积核个数 3,卷积核大小
        self.pool1 = nn.MaxPool2d(2, 2)  # (kernel_size池化核大小,stride or kernel_size 如果不指定就是池化核大小)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))    # input(3, 32, 32) output(16, 28, 28)
        x = self.pool1(x)            # output(16, 14, 14)
        x = F.relu(self.conv2(x))    # output(32, 10, 10)
        x = self.pool2(x)            # output(32, 5, 5)
        #print(x.shape)   #torch.Size([32, 32, 5, 5])
        x = x.view(-1,32*5*5)       # output(32*5*5)   # 展成一维向量  (x.shape[0],-1)
        # 这里-1表示一个不确定的数,就是你如果不确定你想要reshape成几行,但是你很肯定要reshape成4列,那不确定的地方就可以写成-1
        #print(x)
        #print(x.shape)  #torch.Size([32, 800])
        x = F.relu(self.fc1(x))      # output(120)
        x = F.relu(self.fc2(x))      # output(84)
        x = self.fc3(x)  # output(10)   #输出结果一般不使用relu激活
        #print(x.size())   #torch.Size([32, 10])
        return x



import torch
input1=torch.rand([32,3,32,32])
#[batch, channel, height, width],表示batch_size=8, 3通道(灰度图像为1),图片尺寸:224x224
#print(input1)
model=LeNet()
#print(model)
output=model(input1)   #model.forward(input1)
#print(output.size()   #torch.Size([32, 10])

2.train.py

import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# transforms.Compose 将预处理方法打包成一个整体
# 1.ToTensor 将pil图像或者np数据转化成tensor 2.normalize 使用均值和标准差 来标准化tensor
#def main():

transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#https://blog.csdn.net/jzwong/article/details/104272600
#
#     # 50000张训练图片
#     # 第一次使用时要将download设置为True才会自动去下载数据集
## root='./' 把数据集下载到什么地方 一般放在当前目录的data文件夹下 train=True 会导入数据集中训练集  transform为图像预处理
train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=False, transform=transform)
## 将刚才的数据集导入 batch_size 每一批随机拿出36张拿来训练  shuffle打乱
train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
                                                shuffle=True, num_workers=0)

# 10000张验证图片
# 第一次使用时要将download设置为True才会自动去下载数据集
val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=False, transform=transform)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000,
                                         shuffle=False, num_workers=0)

#print(val_loader) #
val_data_iter = iter(val_loader)  # iter 将testloader转化成可迭代的迭代器
val_image, val_label = val_data_iter.next()   # 通过next方法 获取一批数据包括测试的图像 以及图像的标签值  这个非常好用!

# print(val_image.size()) #torch.Size([10000, 3, 32, 32])
# print(val_label.size()) #torch.Size([10000])

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# def imshow(img):
#     img = img / 2 + 0.5      # unnormalize  刚才标准化现在还原
#     npimg = img.numpy()         # 转化成numpy格式
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))  ## 刚才是channel height width 现在要变为 height width channel 所以从(0 1 2) 改成(1 2 0)
#     plt.show()    # 用show展示出来
#
# # print labels
# print(' '.join(f'{classes[val_label[j]]:5s}' for j in range(4)))
#
# # show images
# imshow(torchvision.utils.make_grid(val_image))
#

net = LeNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(5):  # loop over the dataset multiple times  # 训练集需要训练多少次

    running_loss = 0.0  # 累加loss
    for step, data in enumerate(train_loader, start=0):  # 遍历训练集样本 enumerate
        # 不仅能返回每一批的数据data 还会返回这一批data所对应的步数 也就是index
        # get the inputs; data is a list of [inputs, labels]
        #将第一个value 赋值给inputs(每一个样本的32*32) ,第二个value 赋值给 labels(10个分类)

        inputs, labels = data
        # print(inputs.size()) #torch.Size([36, 3, 32, 32])
        # print(labels.size()) #torch.Size([36])
        #print(inputs, labels)

        # zero the parameter gradients
        optimizer.zero_grad()  # 将历史的梯度清零 可以一次性计算多个小的batch

        # forward + backward + optimize
        outputs = net(inputs)
        #print(outputs.size()) #torch.Size([36, 10])
        #print(outputs)
        loss = loss_function(outputs, labels)
        #print(labels.size())  #torch.Size([36])
        #print(labels)
        loss.backward()  # 反向传播
        optimizer.step()  # 参数更新 也是权重更新

        # 打印耗时、损失、准确率等数据
        running_loss += loss.item()   # 每次计算完loss 累加到running_loss变量中
        if step % 500 == 499:    # print every 500 mini-batches
            with torch.no_grad():  # torch.no_grad 在接下来过程中 不要计算每个节点的误差损失梯度 少占用内存或者资源
                outputs = net(val_image)  # 正向传播  [batch, 10]
                #print("output",outputs.size()) #output torch.Size([10000, 10])
                predict_y = torch.max(outputs, dim=1)[1]  #torch.max 寻找输出的最大的index在哪个位置 网络预测最可能属于哪个类别
                # dim = 1 在维度1 上寻找最大的值 [batch, 10]   因为第0个维度对应的是batch 要在输出的十个节点中寻找
                # print(torch.max(outputs, dim=1)) # values=tensor([0.9726, 4.1877, 2.7064,  ..., 3.2531, 1.4753, 2.6609]),
                #                                     #indices=tensor([3, 8, 1,  ..., 5, 1, 7]))
                # print(predict_y) #tensor([3, 8, 1,  ..., 5, 1, 7])
                # print(predict_y.size())#torch.Size([10000])

                accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)
                #print(val_label.size())  #torch.Size([10000])
                #val_label.size(0) 等价val_label.size()[0]=batch_size
                # 计算出来的是个tensor 要通过item得出数值
                # 将预测的标签类别和真实的标签类别进行比较 再求和 在本次测试中预测对了多少个样本

                print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 500, accuracy))
                running_loss = 0.0

print('Finished Training')

save_path = './Lenet.pth'
torch.save(net.state_dict(), save_path)

#
# if __name__ == '__main__':
#     main()

3.predict.py

import torch
import torchvision.transforms as transforms
from PIL import Image

from model import LeNet


def main():
    transform = transforms.Compose(
        [transforms.Resize((32, 32)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    classes = ('plane', 'car', 'bird', 'cat',
               'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    net = LeNet()
    net.load_state_dict(torch.load('Lenet.pth'))

    im = Image.open('1.jpg')
    im = transform(im)  # [C, H, W]
    im = torch.unsqueeze(im, dim=0)  # [N, C, H, W]

    with torch.no_grad():
        outputs = net(im)
        print(outputs)
        # print(outputs.size())
        predict = torch.max(outputs, dim=1)[1].numpy()  #索引
        print(predict)
        print(classes[int(predict)])


if __name__ == '__main__':
    main()

你可能感兴趣的:(图像,pytorch,深度学习,python)