本章节内容将在CIFAR10数据集上训练一个简单的CNN网络:
CIFAR数据集可分为CIFAR10, CIFAR100。 CIFAR-10是指包含10个种类, CIFAR-100包含100个种类。
特点:32x32 彩色图像;10个类别;总共60000张图像;50000张训练样本 + 10000张测试样本;每个类别有6000张图像, 10 x 6000 = 60000;
10个类别:airplane,automobile,bird,cat,deer,dog,frog,horse,ship,truck;
Tips:不需要手动下载, 使用pytorch中的Dataset API自动下载即可
这一步骤在pytorch中非常方便,pytorch已经为我们准备好了常见的数据集合,只需要导入即可。
数据集在torchvision.dataset
包里面:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
torchvision.dataset.CFAIR10
是一个类, 通过实例化该类的一个对象,就可以操作数据集。
参数:
root
-----数据集下载后保存的路径
train
-----训练or测试
download
----是否需要自动下载
transform
----对图像进行变换, 一般需要对原始图像进行ToTensor(), Normalize()
变换
之后,使用DataLoader
类对数据集进行包装,目的是为了方便读取和使用,比如可以min_batch读取, 采用多线程。
# --------------------准备数据集------------------
# Dataset, DataLoader
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), std =(0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data',train=False,
transform=transform, download=True)
trainloader = DataLoader(dataset=trainset, batch_size=4, shuffle=True, num_workers=4)
testloader = DataLoader(dataset=testset, batch_size=4, shuffle=True, num_workers=4)
#
dataiter = iter(trainloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
简单起见,采用LeNet网络,将第一个卷积层的输入通道改为3,因为CIFAR-10是彩色3通道图像。
#定义一个简单的网络
# LeNet -5
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
self.fc1 = nn.Linear(in_features=16 * 5 * 5,out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=84)
self.fc3 = nn.Linear(in_features=84, out_features=10)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool1(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5) # reshape tensor
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
CNN网络训练本质上就是对一个目标函数(损失函数)求最小的问题,在数学中,对于一般的凸函数,优化方法有梯度下降法、牛顿法等。(除此之外还有启发式搜索,比如遗传算法等)。 对于神经网络的训练,常用的优化方法为随机梯度下降法SGD。
# 定义损失函数,优化方法
# 采用Cross-Entropy loss, SGD with moment
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
Iter------一次迭代,是指一个min_batch的一次forward+backward
Epoch------迭代完所有的训练数据(1次),称为一个epoch
这里总共跑20个epoch。
# 训练网络
# 迭代epoch
for epoch in range(20):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the input
inputs, labels = data
# zeros the paramster gradients
optimizer.zero_grad() #
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels) # 计算loss
loss.backward() # loss 求导
optimizer.step() # 更新参数
# print statistics
running_loss += loss.item() # tensor.item() 获取tensor的数值
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000)) # 每2000次迭代,输出loss的平均值
running_loss = 0.0
print('Finished Training')
# --------保存模型-----------
torch.save(net, './model/model_cfair10_2.pth') # 保存整个模型,体积比较大
# torch.save(net.state_dict(), './model/model_cfair10.pth')
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
CIFAR-10总共包含10个类别:
CFAIR10_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'forg', 'horse', 'ship', 'truck']
载入一张图像,RBG,要属于上述类别的某一类,不然识别不出来
# load a image
image = Image.open('/xxxx/image/dog.jpg')
对图像进行相同的变换:
transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(
mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5)
)])
image_transformed = transform(image)
print(image_transformed.size())
需要注意的地方
CNN网络的输入为4D Tensor (NxCxHxW), 转换之后的图像需要变换为4D
torsor1.unsqueeze(0)
即可增加一个维度,这样输入的tensor为: 1x3x32x32
#
transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(
mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5)
)])
image_transformed = transform(image)
print(image_transformed.size())
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
self.fc1 = nn.Linear(in_features=16 * 5 * 5,out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=84)
self.fc3 = nn.Linear(in_features=84, out_features=10)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool1(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5) # reshape tensor
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = torch.load('./model/model_cfair10.pth')
# print(net)
image_transformed = image_transformed.unsqueeze(0)
output = net(image_transformed)
predict_value, predict_idx = torch.max(output, 1) # 求指定维度的最大值,返回最大值以及索引
plt.figure()
plt.imshow(np.array(image))
plt.title(CFAIR10_names[predict_idx])
plt.axis('off')
plt.show()
————————————————————————————————————————————————————————————————————————————————————————————————————————
问题:
首先,需要安装GPU版本的pytorch, 具体安装步骤pytorch官网有。使用GPU训练需要对代码做一些小调整。
**step1:**在代码中,首先使用pytorch中的函数判断是否支持GPU
is_support = torch.cunda.is_available()
if is_support:
device = torch.device('cuda:0')
# device = torch.device('cuda:1')
else:
device = torch.device('cpu')
step2: 将CPU上的计算转移到GPU上
net = Net()
net.to(device) # GPU模式需要添加
# 训练网络
# 迭代epoch
for epoch in range(20):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the input
inputs, labels = data
inputs = inputs.to(device) # GPU计算
labels = labels.to(device) # GPU计算
# zeros the paramster gradients
optimizer.zero_grad() #
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels) # 计算loss
loss.backward() # loss 求导
optimizer.step() # 更新参数
# print statistics
running_loss += loss.item() # tensor.item() 获取tensor的数值
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000)) # 每2000次迭代,输出loss的平均值
running_loss = 0.0
print('Finished Training')
run, 会发现迭代速度飞起,10min左右就可以完成20个Epoch迭代,速度非常快。
上面的代码采用的是固定学习率lr=0.001,。 刚开始迭代时候,学习率可以大一些,这样收敛速度快,随着迭代次数增加,学习率应该减小防止loss震荡。
简单起见,本人将学习率调整为lr=0.0001,然后在之前模型的基础上迭代20个Epoch。明显发现Loss变为0.3, 0.2, 0.1。
虽然采用GPU训练, lr减小为0.0001, loss也减少了(训练集loss)。 在测试中,1个horse识别为deer, bird识别为cat。 因为,要训练到一个适合模型,还需要其他策略,包括采用其他网络模型。
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
CFAIR10_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'forg', 'horse', 'ship', 'truck']
# --------------测试数据集------------------------------
transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(
mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5)
)])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=4)
# -----------------网咯模型-------------------------------
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
self.fc1 = nn.Linear(in_features=16 * 5 * 5,out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=84)
self.fc3 = nn.Linear(in_features=84, out_features=10)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool1(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5) # reshape tensor
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = torch.load('./model/model_cfair10_20.pth',map_location='cpu')
# ------------在整个测试集上测试-------------------------------------------
correct = 0
total = 0
count = 0
with torch.no_grad():
for sample_batch in testloader:
images = sample_batch[0]
labels = sample_batch[1]
# forward
out = net(images)
#
_, pred = torch.max(out, 1)
correct += (pred == labels).sum().item()
total += labels.size(0)
print('batch:{}'.format(count + 1))
count += 1
#
# Acc
accuracy = float(correct) / total
print('Acc = {:.5f}'.format(accuracy))
链接:https://www.jianshu.com/p/e704a6f6e8d3
来源:简书