本次运用了 ResNet50
编号 | 类别 |
0 | airplane |
1 | automobile |
2 | brid |
3 | cat |
4 | deer |
5 | dog |
6 | frog |
7 | horse |
8 | ship |
9 | truck’ |
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
Y = [aug(img) for _ in range(num_rows * num_cols)]
d2l.show_images(Y, num_rows, num_cols, scale=scale)
shape_aug = torchvision.transforms.RandomResizedCrop(
(200, 200), scale=(0.1, 1), ratio=(0.5, 2))
apply(img, shape_aug)
stats = ((0.5,0.5,0.5),(0.5,0.5,0.5))
# 将大小转化为 -1到1
# 随即垂直 水平 翻转 默认 p=0.5
train_transform = tt.Compose([
tt.RandomCrop(32, padding=4, padding_mode="reflect"),
test_transform = tt.Compose([
进行数据集归一化train_data = CIFAR10(download=True,root="Data", transform=train_transform)
test_data = CIFAR10(root="Data", train=False, transform=test_transform)
train_dl = DataLoader(train_data, BATCH_SIZE, num_workers=4, pin_memory=True, shuffle=True)
test_dl = DataLoader(test_data, BATCH_SIZE, num_workers=4, pin_memory=True)
'frog': 5000 1000
'truck': 5000 1000
'deer': 5000 1000
'automobile': 5000 1000
'bird': 5000 1000
'horse': 5000 1000
'ship': 5000 1000
'cat': 5000 1000
'dog': 5000 1000
'airplane': 5000 1000
图像大小 32 ∗ 32 32*32 32∗32,分为10个类,每类6000张图。这里面有50000张用于训练,构成了训练集;另外10000张用于测试,构成测试集
# for 8 images
train_8_samples = DataLoader(train_data, 8, num_workers=4, pin_memory=True, shuffle=True)
dataiter = iter(train_8_samples)
images, labels =
fig, axs = plt.subplots(2, 4, figsize=(16, 6))
nums = 0
for i in range(2):
for j in range(4):
img = images[nums] / 2 + 0.5
npimg = img.numpy()
axs[i][j].imshow(np.transpose(npimg, (1, 2, 0)))
nums += 1
- 调整求解方法,比如更好的初始化、更好的梯度下降算法等
- 调整模型结构,让模型更易于优化——改变模型结构实际上是改变了error surface的形态
的作者从后者入手,探求更好的模型结构。将堆叠的几层 l a y e r layer layer称之为一个 b l o c k block block,对于某个 b l o c k block block,其可以拟合的函数为 F ( x ) F(x) F(x),如果期望的潜在映射为 H ( x ) H(x) H(x),与其让 F ( x ) F(x) F(x)直接学习潜在的映射,不如去学习残差 H ( x ) − x H(x)-x H(x)−x,即 F ( x ) = H ( x ) − x F(x)=H(x)-x F(x)=H(x)−x,这样原本的前向路径上就变成了 F ( x ) + x F(x)+x F(x)+x,用 F ( x ) + x F(x)+x F(x)+x来拟合 H ( x ) H(x) H(x)。作者认为这样可能更易于优化,因为相比于让 F ( x ) F(x) F(x)学习成恒等映射,让 F ( X ) F(X) F(X)学习成 0 0 0要更加容易——后者通过 L 2 L2 L2正则就可以轻松实现。这样,对于冗余的block,只需 F ( x ) → 0 F(x)→0 F(x)→0就可以得到恒等映射,性能不减。
Instead of hoping each few stacked layers directly fit a desired underlying mapping, we explicitly let these layers fit a residual mapping. Formally, denoting the desired underlying mapping as H(x), we let the stacked nonlinear layers fit another mapping of F(x):=H(x)-x. The original mapping is recast into F(x)+x. We hypothesize that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers.
—— from Deep Residual Learning for Image Recognition
F ( x ) + x F(x)+x F(x)+x构成的 b l o c k block block称之为Residual Block,即残差块,如下图所示,多个相似的 R e s i d u a l B l o c k Residual Block ResidualBlock串联构成ResNet
一个残差块有2条路径 F ( x ) F(x) F(x)和 x x x, F ( x ) F(x) F(x)路径拟合残差,不妨称之为残差路径, x x x路径为identity mapping恒等映射,称之为” s h o r t c u t shortcut shortcut”。图中的⊕为element-wise addition,要求参与运算的 F ( x ) F(x) F(x)和 x x x的尺寸要相同。
与plain net相比,ResNet多了很多“旁路”,即shortcut路径,其首尾圈出的layers构成一个Residual Block;
ResNet中,所有的Residual Block都没有pooling层,降采样是通过 c o n v conv conv的 s t r i d e stride stride实现的;
分别在 c o n v 3 1 、 c o n v 4 1 和 c o n v 5 1 R e s i d u a l B l o c k conv3_1、conv4_1和conv5_1 Residual Block conv31、conv41和conv51ResidualBlock,降采样 1 1 1倍,同时 f e a t u r e m a p feature map featuremap数量增加1倍,如图中虚线划定的 b l o c k block block;
通过 A v e r a g e P o o l i n g Average Pooling AveragePooling得到最终的特征,而不是通过全连接层;
每个卷积层之后都紧接着 B a t c h N o r m l a y e r BatchNorm layer BatchNormlayer,为了简化,图中并没有标出;
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def train(epoch):
epoch_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(train_dl):
inputs, targets =,
outputs = net(inputs)
loss = criterion(outputs, targets)
epoch_loss += loss.item()* inputs.size(0)
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
acc = correct / total
loss = epo
def test(epoch):
global best_acc
epoch_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_dl):
inputs, targets =,
outputs = net(inputs)
loss = criterion(outputs, targets)
epoch_loss += loss.item()* inputs.size(0)
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
acc = correct / total
loss = epoch_loss / total
print('test_loss: %.4f test_acc: %.4f '%(loss, acc), end=' ' )
return {'loss': loss, 'acc': acc}
def plot(d, mode='train', best_acc_=None):
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.suptitle('%s_curve' % mode)
plt.subplots_adjust(wspace=0.2, hspace=0.2)
epochs = len(d['acc'])
plt.subplot(1, 2, 1)
plt.plot(np.arange(epochs), d['loss'], label='loss')
plt.legend(loc='upper left')
plt.subplot(1, 2, 2)
plt.plot(np.arange(epochs), d['acc'], label='acc')
if best_acc_ is not None:
plt.scatter(best_acc_[0], best_acc_[1], c='r')
plt.legend(loc='upper left')
plt.savefig('resnet50_cifar10_%s.jpg' % mode, bbox_inches='tight')
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
args = parser.parse_args(args=[])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = ResNet50()
net =
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),,
momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=60)
train_info = {'loss': [], 'acc': []}
test_info = {'loss': [], 'acc': []}
for epoch in range(61):
time1 = time.time()
d_train = train(epoch)
d_test = test(epoch)
print("%.4ss"%(time.time() - time1), end='\n')
for k in train_info.keys():
epoches: 50 train_loss: 0.1613 train_acc: 0.9437 --> test_loss: 0.3568 test_acc: 0.8886 73.6s
epoches: 51 train_loss: 0.1445 train_acc: 0.9496 --> test_loss: 0.3252 test_acc: 0.8972 73.6s
epoches: 52 train_loss: 0.1303 train_acc: 0.9549 --> test_loss: 0.3224 test_acc: 0.9002 73.5s
epoches: 53 train_loss: 0.1125 train_acc: 0.9614 --> test_loss: 0.3165 test_acc: 0.9013 73.5s
epoches: 54 train_loss: 0.0976 train_acc: 0.9667 --> test_loss: 0.3100 test_acc: 0.9073 73.5s
epoches: 55 train_loss: 0.0871 train_acc: 0.9709 --> test_loss: 0.3152 test_acc: 0.9072 73.5s
epoches: 56 train_loss: 0.0795 train_acc: 0.9733 --> test_loss: 0.3089 test_acc: 0.9092 73.5s
epoches: 57 train_loss: 0.0731 train_acc: 0.9754 --> test_loss: 0.3033 test_acc: 0.9114 73.5s
epoches: 58 train_loss: 0.0719 train_acc: 0.9759 --> test_loss: 0.3004 test_acc: 0.9106 73.5s
epoches: 59 train_loss: 0.0679 train_acc: 0.9784 --> test_loss: 0.3028 test_acc: 0.9111 73.5s
epoches: 60 train_loss: 0.0678 train_acc: 0.9781 --> test_loss: 0.3027 test_acc: 0.9113 73.5s
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
` expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
# 默认 num_classes=10
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])