基于CIFAR10的完整模型训练套路

首先准备数据集:

train_data = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(), download=True)#训练数据集
test_data = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)#测试数据集

如果想查看我们的数据集有多少张:

print(len(train_data))

test_data_size = len(test_data)
print('测试集的长度:{}'.format(test_data_size))

接着利用dataloader加载数据:

train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)

而后编写网络:

基于CIFAR10的完整模型训练套路_第1张图片

一般把网络结构在新的文件中编写,在其他文件中导入该文件,CIFAR10是一个10分类模型,本例没有添加激活函数:

from torch import nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, 1, 2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(1024, 64),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.model(x)
        return x

若要测试网络的正确性,创建一个和输入大小相同的数据集,看输出是否是想要的:

import torch

if __name__ == '__main__':
    model = Model()
    input = torch.ones([64, 3, 32, 32])#64的batch_size
    print(model(input).shape)

输出为:

torch.Size([64, 10])

创建网络、损失函数、优化器:

#创建网络模型
model = Model()
#创建损失函数
loss_fn = nn.CrossEntropyLoss()
#创建优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

先设置一些训练网路的参数:

#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0
#训练的轮数
epoch = 10

然后开始训练:

    #训练步骤开始
    for data in train_dataloader:
        imgs, targets = data
        outputs = model(imgs)
        loss = loss_fn(outputs, targets)

        #优化器优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_step = total_train_step+1

        if total_train_step%100 == 0:
            print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))

每次训练完一轮之后,在测试数据集上运行,根据测试数据集的结果(整体的一个loss)来判断模型是否训练完毕:

    #测试步骤开始:
    total_test_loss = 0
    with torch.no_grad():#测试过程中,不需要对其进行调优
        for data in test_dataloader:
            imgs, targets = data
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss = total_test_loss+loss
        print('整体测试集上的Loss:{}'.format(total_test_loss))

使用Tensorboard查看:

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('./logs')
...
        if total_train_step%100 == 0:
            print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))
            writer.add_scalar('train_loss', loss, total_train_step)
...
        print('整体测试集上的Loss:{}'.format(total_test_loss))
        writer.add_scalar('test_loss', total_test_loss, total_test_step)
        total_test_step += 1
...

在terminal中输入tensorboard --logdir=logs打开Tensorboard:

基于CIFAR10的完整模型训练套路_第2张图片

保存每一轮训练的网络:

for i in range(epoch):
...    
    torch.save(model, 'model_{}'.format(i))
    print('模型以保存')

优化:

分类问题中,可以使用正确率来表示网络的好坏,本网络一个图片输入,输出10个数代表10个类别的概率,概率最大所对应的target若和图片原本target相同,则正确,否则错误。使用argmax方法可以得到这10个数的最大值所对应的位置。

输入target为3,2,预测target为4,2

运行  输入target == 预测target

得false,true

将结果相加(false=0,true=1)得1,就是有一个是正确的

argmax()参数为0,则列比较,为1,则行比较。

    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss = total_test_loss+loss
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
        print('整体数据集上的正确率:{}'.format(total_accuracy/len(test_data)))
        writer.add_scalar('total_accuracy', total_accuracy/len(test_data), total_test_step)

 其他:

在训练开始时,可以将网络设置为训练模式,同理测试模式:

    #训练步骤开始
    model.train()
    #测试步骤开始:
    model.eval()

查看官方文档可知,只对一些网络层有作用。

完整代码

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from CIFAR_10 import Model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(), download=True)#训练数据集
test_data = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)#测试数据集

train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)

#创建网络模型
model = Model().to(device)
#创建损失函数
loss_fn = nn.CrossEntropyLoss().to(device)
#创建优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
writer = SummaryWriter('./logs')

#设置训练网络的一些参数
#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0
#训练的轮数
epoch = 10

for i in range(epoch):
    print('-------第{}轮训练开始-------'.format(i+1))

    #训练步骤开始
    model.train()
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        loss = loss_fn(outputs, targets)


        #优化器优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_step = total_train_step+1

        if total_train_step%100 == 0:
            print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))
            writer.add_scalar('train_loss', loss, total_train_step)
    #测试步骤开始:
    model.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss = total_test_loss+loss
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
        print('整体数据集上的正确率:{}'.format(total_accuracy/len(test_data)))
        writer.add_scalar('total_accuracy', total_accuracy/len(test_data), total_test_step)
        print('整体测试集上的Loss:{}'.format(total_test_loss))
        writer.add_scalar('test_loss', total_test_loss, total_test_step)
        total_test_step += 1

    torch.save(model, 'model_{}'.format(i))
    print('模型以保存')

你可能感兴趣的:(神经网络,机器学习,Python,机器学习,深度学习,pytorch)