任务是对10个类别的对象进行分类,使用cifar-10数据集。cifar-10数据集共有60000张彩色图像,大小为32 * 32 * 3,一共有10个类别,每个类别6000张。其中50000张用于训练,10000用于测试。
首先导入必要的包
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import collections
# 使用gpu训练和测试
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'
数据读入和加载
下载并使用PyTorch提供的内置数据集
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 将数据转换为tensor格式并归一化到(-1, 1)区间
train_data = datasets.CIFAR10(root='./', train=True, download=True, transform=transform)
test_data = datasets.CIFAR10(root='./', train=False, download=True, transform=transform)
# 读取数据集
train_loader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=4, shuffle=False, num_workers=4)
# 定义DataLoader加载数据,每批次读入数据为batch_size
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 对应类别labels
可视化操作
def imshow(img):
img = img / 2 + 0.5
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
images, labels = next(iter(train_loader))
print(images.shape, labels.shape)
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s'%classes[labels[j]] for j in range(4)))
模型设计
由于任务较为简单,我们搭建一个CNN,模型构建完成后,将模型放在GPU上用于训练。
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.relu = nn.ReLU()
def forward(self, x):
x = self.pool(self.relu(self.conv1(x)))
x = self.pool(self.relu((self.conv2(x))))
x = x.view(-1, 16*5*5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
net = net.cuda()
设定损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-7)
训练
def train(epoch):
# 设置模型状态为train
net.train()
train_loss = 0
num_pred = 0
for image, label in train_loader:
image, label = image.cuda(), label.cuda()
optimizer.zero_grad()
out_put = net(image)
loss = criterion(out_put, label)
loss.backward()
optimizer.step()
train_loss += loss * len(image)
preds = torch.argmax(out_put, 1)
num_pred += np.sum(preds.cpu().numpy()==label.cpu().numpy())
train_loss /= len(train_loader.dataset)
accuracy = num_pred / len(train_loader.dataset)
print('Epoch:{}\tTraining Loss:{:.6f}\tTraining Accuracy:{:.6f}'.format(epoch, train_loss, accuracy))
测试
def val(epoch):
print('crrent learning rate: ', optimizer.state_dict()["param_groups"][0]["lr"])
net.eval()
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
val_loss = 0
gt_labels = []
pred_labels = []
with torch.no_grad():
for data, label in test_loader:
data, label = data.cuda(), label.cuda()
output = net(data)
preds = torch.argmax(output, 1)
c = (preds == label).squeeze()
for i in range(4):
label1 = label[i]
class_correct[label1] += c[i].item()
class_total[label1] += 1
gt_labels.append(label.cpu().data.numpy())
pred_labels.append(preds.cpu().data.numpy())
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
val_loss = val_loss / len(test_loader.dataset)
gt_labels, pred_labels = np.concatenate(gt_labels), np.concatenate(pred_labels)
acc = np.sum(gt_labels == pred_labels) / len(pred_labels)
print(F'Epoch:{epoch} \tValidation Loss: {val_loss:6f} , Accuracy: {acc:6f}')
for i in range(10):
print("Accuracy of %5s : %2d %%" %(classes[i], 100*class_correct[i] / class_total[i]))
模型搭建完毕,开始训练和测试,先进行10个轮次。
for epoch in range(10):
train(epoch+1)
val(epoch+1)
结果
Epoch:10 Training Loss:0.855442 Accuracy:0.694800
crrent learning rate: 0.001
Epoch:10 Validation Loss:1.158405 Accuracy:0.618900
发现训练和测试精确率都不满意。
加深模型
由于测试精确率较低,我们考虑采用加深模型来提高精确率,将原本的三层卷积加深为四层。
加宽模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 3)
self.conv2 = nn.Conv2d(16, 80, 4)
self.conv3 = nn.Conv2d(80, 400, 3)
self.conv4 = nn.Conv2d(400, 800, 2, padding=2)
self.fc1 = nn.Linear(3200, 400)
self.fc2 = nn.Linear(400, 120)
self.fc3 = nn.Linear(120, 84)
self.fc4 = nn.Linear(84, 10)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(2)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool(x)
x = self.conv3(x)
x = self.relu(x)
x = self.pool(x)
x = self.conv4(x)
x = self.relu(x)
x = self.pool(x)
x = x.view(x.size()[0], -1)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
net = net.cuda()
经过加深和加宽模型后结果为:
Epoch:10 Training Loss:0.447619 Accuracy:0.849860
crrent learning rate: 0.001
Epoch:10 Validation Loss:1.045875 Accuracy:0.713100
逐层归一化
如果不进行归一化,那么由于特征向量中不同特征的取值相差较大,会导致目标函数变“扁”。这样在进行梯度下降的时候,梯度的方向就会偏离最小值的方向,走很多弯路,即训练时间过长。
过拟合问题
对比训练精确度和测试精确率发现,训练精确率远大于测试精确率,即考虑出现过拟合问题。
增加dropout
为了解决过拟合问题,增加dropout层,但会增加训练时间。
正则化力度
将正则化惩罚系数1e-8改为1e-5。
调整学习率
def adjust_learning_rate(optimizer, epoch):
lr = 1e-3 * (0.1) ** (epoch // 4)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 30, 3)
self.conv2 = nn.Conv2d(30, 300, 4)
self.conv3 = nn.Conv2d(300, 600, 3)
self.conv4 = nn.Conv2d(600, 1200, 2, padding=2)
self.fc1 = nn.Linear(4800, 10)
self.fc2 = nn.Linear(400, 10)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(2)
self.dropout = nn.Dropout(0.1)
self.batchnorm1 = nn.BatchNorm2d(30)
self.batchnorm2 = nn.BatchNorm2d(300)
self.batchnorm3 = nn.BatchNorm2d(600)
self.batchnorm4 = nn.BatchNorm2d(1200)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.pool(x)
x = self.batchnorm1(x)
x = self.dropout(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool(x)
x = self.batchnorm2(x)
x = self.dropout(x)
x = self.conv3(x)
x = self.relu(x)
x = self.pool(x)
x = self.batchnorm3(x)
x = self.dropout(x)
x = self.conv4(x)
x = self.relu(x)
x = self.pool(x)
x = self.batchnorm4(x)
x = self.dropout(x)
x = x.view(x.size()[0], -1)
x = self.fc1(x)
# x = self.relu(x)
# x = self.fc2(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
net = net.cuda()
最终结果
经过以上诸多处理手段,得到最终结果为:
Epoch:20 Training Loss:0.172814 Accuracy:0.941460
crrent learning rate: 1.0000000000000002e-07
Epoch:20 Validation Loss: 0.641045 Accuracy: 0.801200