首先准备数据集:
train_data = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(), download=True)#训练数据集
test_data = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)#测试数据集
如果想查看我们的数据集有多少张:
print(len(train_data))
test_data_size = len(test_data)
print('测试集的长度:{}'.format(test_data_size))
接着利用dataloader加载数据:
train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)
而后编写网络:
一般把网络结构在新的文件中编写,在其他文件中导入该文件,CIFAR10是一个10分类模型,本例没有添加激活函数:
from torch import nn
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10),
)
def forward(self, x):
x = self.model(x)
return x
若要测试网络的正确性,创建一个和输入大小相同的数据集,看输出是否是想要的:
import torch
if __name__ == '__main__':
model = Model()
input = torch.ones([64, 3, 32, 32])#64的batch_size
print(model(input).shape)
输出为:
torch.Size([64, 10])
创建网络、损失函数、优化器:
#创建网络模型
model = Model()
#创建损失函数
loss_fn = nn.CrossEntropyLoss()
#创建优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
先设置一些训练网路的参数:
#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0
#训练的轮数
epoch = 10
然后开始训练:
#训练步骤开始
for data in train_dataloader:
imgs, targets = data
outputs = model(imgs)
loss = loss_fn(outputs, targets)
#优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step+1
if total_train_step%100 == 0:
print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))
每次训练完一轮之后,在测试数据集上运行,根据测试数据集的结果(整体的一个loss)来判断模型是否训练完毕:
#测试步骤开始:
total_test_loss = 0
with torch.no_grad():#测试过程中,不需要对其进行调优
for data in test_dataloader:
imgs, targets = data
outputs = model(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss+loss
print('整体测试集上的Loss:{}'.format(total_test_loss))
使用Tensorboard查看:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./logs')
...
if total_train_step%100 == 0:
print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))
writer.add_scalar('train_loss', loss, total_train_step)
...
print('整体测试集上的Loss:{}'.format(total_test_loss))
writer.add_scalar('test_loss', total_test_loss, total_test_step)
total_test_step += 1
...
在terminal中输入tensorboard --logdir=logs打开Tensorboard:
保存每一轮训练的网络:
for i in range(epoch):
...
torch.save(model, 'model_{}'.format(i))
print('模型以保存')
优化:
在分类问题中,可以使用正确率来表示网络的好坏,本网络一个图片输入,输出10个数代表10个类别的概率,概率最大所对应的target若和图片原本target相同,则正确,否则错误。使用argmax方法可以得到这10个数的最大值所对应的位置。
输入target为3,2,预测target为4,2
运行 输入target == 预测target
得false,true
将结果相加(false=0,true=1)得1,就是有一个是正确的
argmax()参数为0,则列比较,为1,则行比较。
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss+loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print('整体数据集上的正确率:{}'.format(total_accuracy/len(test_data)))
writer.add_scalar('total_accuracy', total_accuracy/len(test_data), total_test_step)
其他:
在训练开始时,可以将网络设置为训练模式,同理测试模式:
#训练步骤开始
model.train()
#测试步骤开始:
model.eval()
查看官方文档可知,只对一些网络层有作用。
完整代码
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from CIFAR_10 import Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_data = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(), download=True)#训练数据集
test_data = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)#测试数据集
train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)
#创建网络模型
model = Model().to(device)
#创建损失函数
loss_fn = nn.CrossEntropyLoss().to(device)
#创建优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
writer = SummaryWriter('./logs')
#设置训练网络的一些参数
#记录训练的次数
total_train_step = 0
#记录测试的次数
total_test_step = 0
#训练的轮数
epoch = 10
for i in range(epoch):
print('-------第{}轮训练开始-------'.format(i+1))
#训练步骤开始
model.train()
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
#优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step+1
if total_train_step%100 == 0:
print('训练次数:{},Loss:{}'.format(total_train_step, loss.item()))
writer.add_scalar('train_loss', loss, total_train_step)
#测试步骤开始:
model.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss+loss
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print('整体数据集上的正确率:{}'.format(total_accuracy/len(test_data)))
writer.add_scalar('total_accuracy', total_accuracy/len(test_data), total_test_step)
print('整体测试集上的Loss:{}'.format(total_test_loss))
writer.add_scalar('test_loss', total_test_loss, total_test_step)
total_test_step += 1
torch.save(model, 'model_{}'.format(i))
print('模型以保存')