目录
CIFAR-10:
实现思路:
加载数据集:
定义网络:
定义损失函数和优化器:
训练结果:
完整代码 :
参考:参考链接
一共包含 10 个类别的 RGB 彩色图 片:飞机( a叩lane )、汽车( automobile )、鸟类( bird )、猫( cat )、鹿( deer )、狗( dog )、蛙类( frog )、马( horse )、船( ship )和卡车( truck )。图片的尺寸为 32×32 ,数据集中一共有 50000 张训练圄片和 10000 张测试图片。
使用torchvision.datasets加载,该库中包含以下几种数据集
具体来说加载CIFAR10 时候,使用torchvision.datasets.CIFAR10加载,如:
train_set = tv.datasets.CIFAR10(
root='/ML_students_data/tan_xy_store/CIFAR-10Classification/data',
train=True,
download=False,
transform=transform)
其中,root表示数据集所在的路径,download表示是否从网上下载,得到数据集名称train_set
之后使用torch.utils.data.DataLoader数据集加载器加载数据。
train_loader = t.utils.data.DataLoader(
train_set,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=0) #使用的进程数
其中batch_size为每批次加载的图片数量数 ,shuffle为True则表示每个epoch都打乱数据,注意在window下设置num_work不等于0会报错,应该是函数本身的bug。
测试集加载方式与训练集加载方式相同。
本次使用简单的LeNet网络,网络结构如下图
网络结构定义通过继承nn.model实现
class Lenet(nn.Module):
def __init__(self):
super(Lenet, self).__init__()
# Conv layers
self.conv1 = nn.Conv2d(3, 6, 5)
self.bn1 = nn.BatchNorm2d(6)
self.conv2 = nn.Conv2d(6, 16, 5)
self.bn2 = nn.BatchNorm2d(16)
# Linear layers
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# Maxpoling layers
self.pool1 = nn.MaxPool2d(2, 2)
self.pool2 = nn.MaxPool2d(2, 2)
# Relu layers
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.relu(self.bn1(self.conv1(x)))
x = self.pool1(x)
x = self.relu(self.bn2(self.conv2(x)))
x = self.pool2(x)
# view 相当于numpy 中resize()
x = x.view(x.size()[0], -1)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
按照步骤,先在 __init__(self)中定义好需要用到的网络操作,如卷积,池化,全连接等,之后再forward中调用方法一步一步实现前向传播
注意在连接全连接层之前要用x = x.view(x.size()[0], -1)调整参数大小。
在训练中,定义损失函数交叉熵损失函数,采用梯度下降法。
使用enumerate() 函数用于将一个可遍历的data数据对象组合为一个索引序列,同时列出数据和数据下标。
def train(A, epochs):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)
t.set_num_threads(8)
net.to(device)
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(A, 0):
input_data, label_data = data
input_data = input_data.to(device)
label_data = label_data.to(device)
optimizer.zero_grad()
out = net(input_data)
loss = criterion(out, label_data)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' \
% (epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
训练结果如图所示,虽然在训练集的loss将下来的,但是测试集的准确率并不高,可能是网络过于简单,呈现过拟合了
总比瞎猜准,还要啥自行车??有时间把网络改成resnet50试试效果
import torch.nn as nn
import torch.nn.functional as F
import torch as t
import torch.optim as optim
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
class Lenet(nn.Module):
def __init__(self):
super(Lenet, self).__init__()
# Conv layers
self.conv1 = nn.Conv2d(3, 6, 5)
self.bn1 = nn.BatchNorm2d(6)
self.conv2 = nn.Conv2d(6, 16, 5)
self.bn2 = nn.BatchNorm2d(16)
# Linear layers
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# Maxpoling layers
self.pool1 = nn.MaxPool2d(2, 2)
self.pool2 = nn.MaxPool2d(2, 2)
# Relu layers
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.relu(self.bn1(self.conv1(x)))
x = self.pool1(x)
x = self.relu(self.bn2(self.conv2(x)))
x = self.pool2(x)
# 相当于numpy中resize()
x = x.view(x.size()[0], -1)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
def data_load():
transform = transforms.Compose([
transforms.ToTensor(), # 转为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化
])
# 训练集
train_set = tv.datasets.CIFAR10(
root='H:\pytorch\CIFAR-10Classification\data',
train=True,
download=False,
transform=transform)
train_loader = t.utils.data.DataLoader(
train_set,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=0) # 加载数据使用的进程数量
test_set = tv.datasets.CIFAR10(
'H:\pytorch\CIFAR-10Classification\data',
train=False,
download=False,
transform=transform)
test_loader = t.utils.data.DataLoader(
test_set,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=0)
return train_loader, test_loader
def train(A, epochs):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)
t.set_num_threads(8)
net.to(device)
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(A, 0):
input_data, label_data = data
input_data = input_data.to(device)
label_data = label_data.to(device)
optimizer.zero_grad()
out = net(input_data)
loss = criterion(out, label_data)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' \
% (epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
def test(B):
correct = 0 # 预测正确的图片数
total = 0 # 总共的图片数
# 由于测试的时候不需要求导,可以暂时关闭autograd,提高速度,节约内存
with t.no_grad():
for data in B:
images, labels = data
outputs = net(images)
_, predicted = t.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('10000张测试集中的准确率为: %d %%' % (100 * correct / total))
BATCH_SIZE = 4
k = 1
net = Lenet()
train_loader, test_loader = data_load()
train(train_loader, epochs=k)
test(test_loader)