以CIFAR10数据集为例:
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time
from Model import *
# 准备数据集
train_data = torchvision.datasets.CIFAR10("../data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_data = torchvision.datasets.CIFAR10("../data", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
# length长度
train_data_len = len(train_data)
test_data_len = len(test_data)
print("训练集: {}".format(train_data_len))
print("测试集: {}".format(test_data_len))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
liang = Liang()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
# learning_rate = 0.01
learning_rate = 1e-2 # 1*(10)^(-2)=1/100
optimizer = torch.optim.SGD(liang.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
total_train_step = 0 # 训练次数
total_test_step = 0 # 测试次数
epoch = 10 # 训练轮数
# 添加TensorBoard
writer = SummaryWriter("../logs")
start_time = time.time()
for i in range(epoch):
print("-----------第 {} 轮训练开始----------".format(i + 1))
# 训练步骤开始
liang.train()
for data in train_dataloader:
imgs, targets = data
outputs = liang(imgs)
loss = loss_fn(outputs, targets)
# 优化调优
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time)
print("训练次数: {}, Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
# 测试步骤开始
liang.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
outputs = liang(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print("整体测试集上的Loss: {}".format(total_test_loss))
print("整体测试集上的正确率: {}".format(total_accuracy / test_data_len))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / test_data_len, total_test_step)
total_test_step += 1
torch.save(liang, "../model/liang_{}.pth".format(i))
print("模型已保存")
writer.close()
Files already downloaded and verified
Files already downloaded and verified
训练集: 50000
测试集: 10000
-----------第 1 轮训练开始----------
6.537519931793213
训练次数: 100, Loss: 2.288882255554199
13.001430749893188
训练次数: 200, Loss: 2.271170139312744
19.13790225982666
训练次数: 300, Loss: 2.247511148452759
25.20561981201172
训练次数: 400, Loss: 2.168041706085205
31.378580570220947
训练次数: 500, Loss: 2.049440383911133
37.541871309280396
训练次数: 600, Loss: 2.054497241973877
43.90901756286621
训练次数: 700, Loss: 1.9997793436050415
整体测试集上的Loss: 309.624484539032
整体测试集上的正确率: 0.2912999987602234
模型已保存
...
将 神经网络、损失函数、数据 转为 cuda(GPU型) 执行,我们可以发现,速度明显比CPU执行的快很多!
# 神经网络
liang = Liang()
if torch.cuda.is_available():
liang = liang.cuda()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
# 数据
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
Files already downloaded and verified
Files already downloaded and verified
训练集: 50000
测试集: 10000
-----------第 1 轮训练开始----------
10.994545936584473
训练次数: 100, Loss: 2.2849647998809814
12.99094533920288
训练次数: 200, Loss: 2.2762258052825928
14.33635950088501
训练次数: 300, Loss: 2.230626106262207
16.00475764274597
训练次数: 400, Loss: 2.1230242252349854
17.964726209640503
训练次数: 500, Loss: 2.022688150405884
19.61249876022339
训练次数: 600, Loss: 2.01230788230896
20.96266460418701
训练次数: 700, Loss: 1.9741096496582031
整体测试集上的Loss: 305.68632411956787
整体测试集上的正确率: 0.29739999771118164
模型已保存
...
# 定义训练的设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# 神经网络
liang = Liang()
liang = liang.to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
# 数据
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
Files already downloaded and verified
Files already downloaded and verified
cuda
训练集: 50000
测试集: 10000
-----------第 1 轮训练开始----------
9.563657283782959
训练次数: 100, Loss: 2.2956345081329346
10.768706560134888
训练次数: 200, Loss: 2.2770333290100098
11.968295335769653
训练次数: 300, Loss: 2.26665997505188
13.181000471115112
训练次数: 400, Loss: 2.2037200927734375
14.387518167495728
训练次数: 500, Loss: 2.0665152072906494
15.585152387619019
训练次数: 600, Loss: 2.0054214000701904
16.81506586074829
训练次数: 700, Loss: 2.0446667671203613
整体测试集上的Loss: 320.6275497674942
整体测试集上的正确率: 0.2667999863624573
模型已保存
...
我们可以借助Google提供的Colab来进行GPU训练:https://colab.research.google.com/ (需要VPN)
在Colab中如果想要使用GPU进行训练,需要在笔记本设置中选择GPU。
明显快好多!!!
import torch
import torchvision
from PIL import Image
from torch import nn
image_pth = "../images/dog.jpg"
image = Image.open(image_pth)
print(image)
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image = transform(image)
print(image.shape)
class Liang(nn.Module):
def __init__(self):
super(Liang, self).__init__()
self.module = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64 * 4 * 4, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.module(x)
return x
model = torch.load("../model/liang_4.pth")
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
print(image.shape)
model.eval()
image = image.cuda()
with torch.no_grad():
output = model(image)
print(output)
print(output.argmax(1))
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x200 at 0x19E66C68100>
torch.Size([3, 32, 32])
Liang(
(module): Sequential(
(0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Flatten(start_dim=1, end_dim=-1)
(7): Linear(in_features=1024, out_features=64, bias=True)
(8): Linear(in_features=64, out_features=10, bias=True)
)
)
torch.Size([1, 3, 32, 32])
tensor([[-0.8167, -2.1763, 1.3891, 0.7956, 1.2035, 1.8374, -0.7936, 1.7908,
-2.0639, -1.4441]], device='cuda:0')
tensor([5], device='cuda:0')