定义
残差网络(ResNet)是由来自Microsoft Research的4位学者提出的卷积神经网络,在2015年的ImageNet大规模视觉识别竞赛(ImageNet Large Scale Visual Recognition Challenge, ILSVRC)中获得了图像分类和物体识别的优胜。 残差网络的特点是容易优化,并且能够通过增加相当的深度来提高准确率。其内部的残差块使用了跳跃连接(short cut),缓解了在深度神经网络中增加深度带来的梯度消失问题
深度残差网络(ResNet)除最开始的卷积池化和最后池化的全连接之外,网络中有很多结构相似的单元,这些重复的单元的共同点就是有个跨层直连的short cut,同时将这些单元称作Residual Block。Residual Block的构造图如下(图中 x identity 标注的曲线表示 short cut):
网络结构图
resnet.py
import torch
from torch import nn
from torch.nn import functional as F
class ResBlk(nn.Module):
# resnet block
def __init__(self, ch_in, ch_out, stride = 1):
super(ResBlk, self).__init__()
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential()
if ch_out != ch_in:
# [b, ch_in, h, w] -> [b, ch_out, h, w]
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=2),
nn.BatchNorm2d(ch_out),
)
def forward(self, x):
# x:[b, ch, h, w]
out =F.relu(self.bn1(self.conv1(x)))
out =self.bn2(self.conv2(out))
# short cut
out = self.extra(x) + out
# out = F.relu(out)
return out
# ResNet34
class ResNet18(nn.Module):
def __init__(self, block):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
)
# followed 4 blocks
# [b, 64, h, w] -> [b, 128, h, w]
# self.blk1 = ResBlk(16, 16)
self.blk1 = nn.Sequential(
ResBlk(64, 64, 1),
ResBlk(64, 64, 1),
ResBlk(64, 64, 1)
)
# [b, 128, h, w] -> [b, 256, h, w]
# self.blk2 = ResBlk(16, 32)
self.blk2 = nn.Sequential(
ResBlk(64, 128, 2),
ResBlk(128, 128, 1),
ResBlk(128, 128, 1),
ResBlk(128, 128, 1),
)
# [b, 256, h, w] -> [b, 512, h, w]
self.blk3 = nn.Sequential(
ResBlk(128, 256, 2),
ResBlk(256, 256, 1),
ResBlk(256, 256, 1),
ResBlk(256, 256, 1),
ResBlk(256, 256, 1),
ResBlk(256, 256, 1),
)
# self.blk3 = ResBlk(128, 256)
# [b, 512, h, w] -> [b, 1024, h, w]
self.blk4 = nn.Sequential(
ResBlk(256, 512, 2),
ResBlk(512, 512, 1),
ResBlk(512, 512, 1),
)
# self.blk4 = ResBlk(256, 512)
self.avg_pool = nn.AvgPool2d(kernel_size=2)
self.outlayer = nn.Linear(512, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
# [b, 64, h, w] -> [b, 128, h, w]
x = self.blk1(x)
x = self.blk2(x)
x = self.blk3(x)
x = self.blk4(x)
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.outlayer(x)
return x
def main():
# blk = ResBlk(64, 128, 1)
# tmp = torch.randn(2, 64, 32, 32)
# out = blk(tmp)
# # torch.Size([2, 128, 32, 32])
# print('blk:', out.shape)
model = ResNet18(ResBlk)
tmp = torch.randn(2, 3, 32, 32)
out = model(tmp)
print('resnet:', out.shape)
if __name__ == '__main__':
main()
CNN.py
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch import nn, optim
from torchvision import transforms
# from lenet5 import Lenet5
from resnet import ResNet18, ResBlk
import matplotlib.pyplot as plt
def main():
batchsz = 32
# 加载CIFAR10数据集
cifar_train = datasets.CIFAR10('cifar', True, transform = transforms.Compose([
transforms.Resize([32, 32]),
transforms.ToTensor()
]), download = True)
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize([32, 32]),
transforms.ToTensor()
]), download=True)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
# 测试
x, label = iter(cifar_train).next()
print('x:', x.shape, 'label:', label.shape)
device = torch.device('cuda')
# model = Lenet5().to(device)
model = ResNet18(ResBlk).to(device)
criteon = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
train_result = []
test_result = []
for epoch in range(100):
model.train()
for batchidx, (x, label) in enumerate(cifar_train):
# print(batchidx)
x, label = x.to(device), label.to(device)
# logits:[b, 10]
# label:[b]
# loss:tensor scalar
logits = model(x)
loss = criteon(logits, label)
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 完成一个epoch
print(epoch, "loss = ", loss.item())
train_result.append(loss.item())
model.eval()
with torch.no_grad():
# test
total_correct = 0
total_num = 0
for x, label in cifar_test:
x, label = x.to(device), label.to(device)
logits = model(x)
# 得到最大值所在的索引
pred = logits.argmax(dim=1)
# [b] vs [b] -> scalar tensor
total_correct += torch.eq(pred, label).float().sum().item()
total_num += x.size(0)
acc = total_correct / total_num
test_result.append(acc)
print(epoch, acc)
# print(train_result)
# print(test_result)
plt.plot(train_result, label="train_loss")
plt.plot(test_result, label="test_acc")
plt.legend()
plt.savefig("picture", dpi=300)
# plt.show()
# file = open("result.txt", 'a')
# for line1, line2 in train_result, test_result:
# file.write(str(line1) + '\n')
# file.write(str(line2) + '\n')
# # file.write(train_result)
# # file.write(test_result)
# file.close()
if __name__ == '__main__':
main()
总共进行了100轮试验,相较于 LeNet-5 网络,ResNet 网络在准确率上有了很大的提升,大约在 80% 左右