- 本文为365天深度学习训练营中的学习记录博客
- 原作者:K同学啊|接辅导、项目定制
第J2周:ResNet50V2算法实战与解析
**注:**从前几周开始训练营的难度逐渐提升,具体体现在不再直接提供源代码。任务中会给大家提供一些算法改进的思路/方向,希望大家这一块可以积极探索。(这个探索的过程很重要,也将学到更多)
论文原文,何凯明大神在这篇论文中提出了一种新的残差单元。我们将这篇论文中的ResNet结构称为ResNetV2:
Identity Mappings in Deep Residual Networks.pdf
** 改进点: **(a)original表示原始的ResNet的残差结构,(b)proposed表示新的ResNet的残差结构。主要差别就是(a)结构先卷积后进行BN和激活函数计算,最后执行addition后再进行ReLU计算;(b)结构先进性BN和激活函数计算后卷积,把addition后的ReLU计算放到了残差结构内部。
改进结果:作者使用这两种不同的结构再CIFAR-10数据集上做测试,模型用的是1001层的ResNet模型。从图中的结果我们可以看出,(b)proposed的测试集错误率明显更低一些,达到了4.92%的错误率。(a)original的测试集错误率是7.61%。
(b-f)中的快捷连接被不同的组件阻碍。为了简化插图,我们不显示BN层,这里所有的单位均采用权值层之后的BN层。图中(a-f)都是作者对残差结构的shortcut部分进行的不同尝试,作者对不同shortcut结构的尝试结果如下表所示。
作者用不同的shortcut结构的ResNet-110在CIFAR-10数据集上做测试,发现最原始的(a)original结构是最好的,也就是identity mapping恒等映射是最好的。
最好的结果是(e)full pre-activation,其次是(a)original。
tensorFlow
''' 残差块
Arguments:
x: 输入张量
filters: integer, filters, of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True, otherwise identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
'''
def block2(x, filters, kernel_size=3, stride=1, conv_shortcut=False, name=None):
preact = BatchNormalization(name=name+'_preact_bn')(x)
preact = Activation('relu', name=name+'_preact_relu')(preact)
if conv_shortcut:
shortcut = Conv2D(4*filters, 1, strides=stride, name=name+'_0_conv')(preact)
else:
# 注意后面多的if语句
shortcut = MaxPooling2D(1, strides=stride)(x) if stride>1 else x
x = Conv2D(filters, 1, strides=1, use_bias=False, name=name+'_1_conv')(preact)
x = BatchNormalization(name=name+'_1_bn')(x)
x = Activation('relu', name=name+'_1_relu')(x)
x = ZeroPadding2D(padding=((1, 1), (1, 1)), name=name+'_2_pad')(x)
x = Conv2D(filters, kernel_size, strides=stride, use_bias=False, name=name+'_2_conv')(x)
x = BatchNormalization(name=name+'_2_bn')(x)
x = Activation('relu', name=name+'_2_relu')(x)
x = Conv2D(4*filters, 1, name=name+'_3_conv')(x)
x = layers.Add(name=name+'_out')([shortcut, x])
return x
Pytorch
''' Residual Block '''
class Block2(nn.Module):
def __init__(self, in_channel, filters, kernel_size=3, stride=1, conv_shortcut=False):
super(Block2, self).__init__()
self.preact = nn.Sequential(
nn.BatchNorm2d(in_channel),
nn.ReLU(True)
)
self.shortcut = conv_shortcut
if self.shortcut:
self.short = nn.Conv2d(in_channel, 4*filters, 1, stride=stride, padding=0, bias=False)
elif stride>1:
self.short = nn.MaxPool2d(kernel_size=1, stride=stride, padding=0)
else:
self.short = nn.Identity()
self.conv1 = nn.Sequential(
nn.Conv2d(in_channel, filters, 1, stride=1, bias=False),
nn.BatchNorm2d(filters),
nn.ReLU(True)
)
self.conv2 = nn.Sequential(
nn.Conv2d(filters, filters, kernel_size, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(filters),
nn.ReLU(True)
)
self.conv3 = nn.Conv2d(filters, 4*filters, 1, stride=1, bias=False)
def forward(self, x):
x1 = self.preact(x)
if self.shortcut:
x2 = self.short(x1)
else:
x2 = self.short(x)
x1 = self.conv1(x1)
x1 = self.conv2(x1)
x1 = self.conv3(x1)
x = x1 + x2
return x
tensorFlow
def stack2(x, filters, blocks, stride1=2, name=None):
x = block2(x, filters, conv_shortcut=True, name=name+'_block1')
for i in range(2, blocks):
x = block2(x, filters, name=name+'_block'+str(i))
x = block2(x, filters, stride=stride1, name=name+'_block'+str(blocks))
return x
Pytorch
class Stack2(nn.Module):
def __init__(self, in_channel, filters, blocks, stride=2):
super(Stack2, self).__init__()
self.conv = nn.Sequential()
self.conv.add_module(str(0), Block2(in_channel, filters, conv_shortcut=True))
for i in range(1, blocks-1):
self.conv.add_module(str(i), Block2(4*filters, filters))
self.conv.add_module(str(blocks-1), Block2(4*filters, filters, stride=stride))
def forward(self, x):
x = self.conv(x)
return x
''' 构建ResNet50V2 '''
def ResNet50V2(include_top=True, # 是否包含位于网络顶部的全链接层
preact=True, # 是否使用预激活
use_bias=True, # 是否对卷积层使用偏置
weights='imagenet',
input_tensor=None, # 可选的keras张量,用作模型的图像输入
input_shape=None,
pooling=None,
classes=1000, # 用于分类图像的可选类数
classifer_activation='softmax'): # 分类层激活函数
img_input = Input(shape=input_shape)
x = ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
x = Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)
if not preact:
x = BatchNormalization(name='conv1_bn')(x)
x = Activation('relu', name='conv1_relu')(x)
x = ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1+pad')(x)
x = MaxPooling2D(3, strides=2, name='pool1_pool')(x)
x = stack2(x, 64, 3, name='conv2')
x = stack2(x, 128, 4, name='conv3')
x = stack2(x, 256, 6, name='conv4')
x = stack2(x, 512, 3, strides=1, name='conv5')
if preact:
x = BatchNormalization(name='post_bn')(x)
x = Activation('relu', name='post_relu')(x)
if include_top:
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(classes, activation=classifer_activation, name='predictions')(x)
else:
if pooling=='avg':
# GlobalAveragePooling2D就是将每张图片的每个通道值各自加起来再求平均,
# 最后结果是没有了宽高维度,只剩下个数与平均值两个维度
# 可以理解成变成了多张单像素图片
x = GlobalAveragePooling2D(name='avg_pool')(x)
elif pooling=='max':
x = GlobalMaxPooling2D(name='max_pool')(x)
model = Model(img_input, x, name='resnet50v2')
return model
Pytorch
''' 构建ResNet50V2 '''
class ResNet50V2(nn.Module):
def __init__(self,
include_top=True, # 是否包含位于网络顶部的全链接层
preact=True, # 是否使用预激活
use_bias=True, # 是否对卷积层使用偏置
input_shape=[224, 224, 3],
classes=1000,
pooling=None): # 用于分类图像的可选类数
super(ResNet50V2, self).__init__()
self.conv1 = nn.Sequential()
self.conv1.add_module('conv', nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=use_bias, padding_mode='zeros'))
if not preact:
self.conv1.add_module('bn', nn.BatchNorm2d(64))
self.conv1.add_module('relu', nn.ReLU())
self.conv1.add_module('max_pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.conv2 = Stack2(64, 64, 3)
self.conv3 = Stack2(256, 128, 4)
self.conv4 = Stack2(512, 256, 6)
self.conv5 = Stack2(1024, 512, 3, stride=1)
self.post = nn.Sequential()
if preact:
self.post.add_module('bn', nn.BatchNorm2d(2048))
self.post.add_module('relu', nn.ReLU())
if include_top:
self.post.add_module('avg_pool', nn.AdaptiveAvgPool2d((1, 1)))
self.post.add_module('flatten', nn.Flatten())
self.post.add_module('fc', nn.Linear(2048, classes))
else:
if pooling=='avg':
self.post.add_module('avg_pool', nn.AdaptiveAvgPool2d((1, 1)))
elif pooling=='max':
self.post.add_module('max_pool', nn.AdaptiveMaxPool2d((1, 1)))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.post(x)
return x
网络结构打印
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 112, 112] 9,472
MaxPool2d-2 [-1, 64, 56, 56] 0
BatchNorm2d-3 [-1, 64, 56, 56] 128
ReLU-4 [-1, 64, 56, 56] 0
Conv2d-5 [-1, 256, 56, 56] 16,384
Conv2d-6 [-1, 64, 56, 56] 4,096
BatchNorm2d-7 [-1, 64, 56, 56] 128
ReLU-8 [-1, 64, 56, 56] 0
Conv2d-9 [-1, 64, 56, 56] 36,864
BatchNorm2d-10 [-1, 64, 56, 56] 128
ReLU-11 [-1, 64, 56, 56] 0
Conv2d-12 [-1, 256, 56, 56] 16,384
Block2-13 [-1, 256, 56, 56] 0
BatchNorm2d-14 [-1, 256, 56, 56] 512
ReLU-15 [-1, 256, 56, 56] 0
Identity-16 [-1, 256, 56, 56] 0
Conv2d-17 [-1, 64, 56, 56] 16,384
BatchNorm2d-18 [-1, 64, 56, 56] 128
ReLU-19 [-1, 64, 56, 56] 0
Conv2d-20 [-1, 64, 56, 56] 36,864
BatchNorm2d-21 [-1, 64, 56, 56] 128
ReLU-22 [-1, 64, 56, 56] 0
Conv2d-23 [-1, 256, 56, 56] 16,384
Block2-24 [-1, 256, 56, 56] 0
BatchNorm2d-25 [-1, 256, 56, 56] 512
ReLU-26 [-1, 256, 56, 56] 0
MaxPool2d-27 [-1, 256, 28, 28] 0
Conv2d-28 [-1, 64, 56, 56] 16,384
BatchNorm2d-29 [-1, 64, 56, 56] 128
ReLU-30 [-1, 64, 56, 56] 0
Conv2d-31 [-1, 64, 28, 28] 36,864
BatchNorm2d-32 [-1, 64, 28, 28] 128
ReLU-33 [-1, 64, 28, 28] 0
Conv2d-34 [-1, 256, 28, 28] 16,384
Block2-35 [-1, 256, 28, 28] 0
Stack2-36 [-1, 256, 28, 28] 0
BatchNorm2d-37 [-1, 256, 28, 28] 512
ReLU-38 [-1, 256, 28, 28] 0
Conv2d-39 [-1, 512, 28, 28] 131,072
Conv2d-40 [-1, 128, 28, 28] 32,768
BatchNorm2d-41 [-1, 128, 28, 28] 256
ReLU-42 [-1, 128, 28, 28] 0
Conv2d-43 [-1, 128, 28, 28] 147,456
BatchNorm2d-44 [-1, 128, 28, 28] 256
ReLU-45 [-1, 128, 28, 28] 0
Conv2d-46 [-1, 512, 28, 28] 65,536
Block2-47 [-1, 512, 28, 28] 0
BatchNorm2d-48 [-1, 512, 28, 28] 1,024
ReLU-49 [-1, 512, 28, 28] 0
Identity-50 [-1, 512, 28, 28] 0
Conv2d-51 [-1, 128, 28, 28] 65,536
BatchNorm2d-52 [-1, 128, 28, 28] 256
ReLU-53 [-1, 128, 28, 28] 0
Conv2d-54 [-1, 128, 28, 28] 147,456
BatchNorm2d-55 [-1, 128, 28, 28] 256
ReLU-56 [-1, 128, 28, 28] 0
Conv2d-57 [-1, 512, 28, 28] 65,536
Block2-58 [-1, 512, 28, 28] 0
BatchNorm2d-59 [-1, 512, 28, 28] 1,024
ReLU-60 [-1, 512, 28, 28] 0
Identity-61 [-1, 512, 28, 28] 0
Conv2d-62 [-1, 128, 28, 28] 65,536
BatchNorm2d-63 [-1, 128, 28, 28] 256
ReLU-64 [-1, 128, 28, 28] 0
Conv2d-65 [-1, 128, 28, 28] 147,456
BatchNorm2d-66 [-1, 128, 28, 28] 256
ReLU-67 [-1, 128, 28, 28] 0
Conv2d-68 [-1, 512, 28, 28] 65,536
Block2-69 [-1, 512, 28, 28] 0
BatchNorm2d-70 [-1, 512, 28, 28] 1,024
ReLU-71 [-1, 512, 28, 28] 0
MaxPool2d-72 [-1, 512, 14, 14] 0
Conv2d-73 [-1, 128, 28, 28] 65,536
BatchNorm2d-74 [-1, 128, 28, 28] 256
ReLU-75 [-1, 128, 28, 28] 0
Conv2d-76 [-1, 128, 14, 14] 147,456
BatchNorm2d-77 [-1, 128, 14, 14] 256
ReLU-78 [-1, 128, 14, 14] 0
Conv2d-79 [-1, 512, 14, 14] 65,536
Block2-80 [-1, 512, 14, 14] 0
Stack2-81 [-1, 512, 14, 14] 0
BatchNorm2d-82 [-1, 512, 14, 14] 1,024
ReLU-83 [-1, 512, 14, 14] 0
Conv2d-84 [-1, 1024, 14, 14] 524,288
Conv2d-85 [-1, 256, 14, 14] 131,072
BatchNorm2d-86 [-1, 256, 14, 14] 512
ReLU-87 [-1, 256, 14, 14] 0
Conv2d-88 [-1, 256, 14, 14] 589,824
BatchNorm2d-89 [-1, 256, 14, 14] 512
ReLU-90 [-1, 256, 14, 14] 0
Conv2d-91 [-1, 1024, 14, 14] 262,144
Block2-92 [-1, 1024, 14, 14] 0
BatchNorm2d-93 [-1, 1024, 14, 14] 2,048
ReLU-94 [-1, 1024, 14, 14] 0
Identity-95 [-1, 1024, 14, 14] 0
Conv2d-96 [-1, 256, 14, 14] 262,144
BatchNorm2d-97 [-1, 256, 14, 14] 512
ReLU-98 [-1, 256, 14, 14] 0
Conv2d-99 [-1, 256, 14, 14] 589,824
BatchNorm2d-100 [-1, 256, 14, 14] 512
ReLU-101 [-1, 256, 14, 14] 0
Conv2d-102 [-1, 1024, 14, 14] 262,144
Block2-103 [-1, 1024, 14, 14] 0
BatchNorm2d-104 [-1, 1024, 14, 14] 2,048
ReLU-105 [-1, 1024, 14, 14] 0
Identity-106 [-1, 1024, 14, 14] 0
Conv2d-107 [-1, 256, 14, 14] 262,144
BatchNorm2d-108 [-1, 256, 14, 14] 512
ReLU-109 [-1, 256, 14, 14] 0
Conv2d-110 [-1, 256, 14, 14] 589,824
BatchNorm2d-111 [-1, 256, 14, 14] 512
ReLU-112 [-1, 256, 14, 14] 0
Conv2d-113 [-1, 1024, 14, 14] 262,144
Block2-114 [-1, 1024, 14, 14] 0
BatchNorm2d-115 [-1, 1024, 14, 14] 2,048
ReLU-116 [-1, 1024, 14, 14] 0
Identity-117 [-1, 1024, 14, 14] 0
Conv2d-118 [-1, 256, 14, 14] 262,144
BatchNorm2d-119 [-1, 256, 14, 14] 512
ReLU-120 [-1, 256, 14, 14] 0
Conv2d-121 [-1, 256, 14, 14] 589,824
BatchNorm2d-122 [-1, 256, 14, 14] 512
ReLU-123 [-1, 256, 14, 14] 0
Conv2d-124 [-1, 1024, 14, 14] 262,144
Block2-125 [-1, 1024, 14, 14] 0
BatchNorm2d-126 [-1, 1024, 14, 14] 2,048
ReLU-127 [-1, 1024, 14, 14] 0
Identity-128 [-1, 1024, 14, 14] 0
Conv2d-129 [-1, 256, 14, 14] 262,144
BatchNorm2d-130 [-1, 256, 14, 14] 512
ReLU-131 [-1, 256, 14, 14] 0
Conv2d-132 [-1, 256, 14, 14] 589,824
BatchNorm2d-133 [-1, 256, 14, 14] 512
ReLU-134 [-1, 256, 14, 14] 0
Conv2d-135 [-1, 1024, 14, 14] 262,144
Block2-136 [-1, 1024, 14, 14] 0
BatchNorm2d-137 [-1, 1024, 14, 14] 2,048
ReLU-138 [-1, 1024, 14, 14] 0
MaxPool2d-139 [-1, 1024, 7, 7] 0
Conv2d-140 [-1, 256, 14, 14] 262,144
BatchNorm2d-141 [-1, 256, 14, 14] 512
ReLU-142 [-1, 256, 14, 14] 0
Conv2d-143 [-1, 256, 7, 7] 589,824
BatchNorm2d-144 [-1, 256, 7, 7] 512
ReLU-145 [-1, 256, 7, 7] 0
Conv2d-146 [-1, 1024, 7, 7] 262,144
Block2-147 [-1, 1024, 7, 7] 0
Stack2-148 [-1, 1024, 7, 7] 0
BatchNorm2d-149 [-1, 1024, 7, 7] 2,048
ReLU-150 [-1, 1024, 7, 7] 0
Conv2d-151 [-1, 2048, 7, 7] 2,097,152
Conv2d-152 [-1, 512, 7, 7] 524,288
BatchNorm2d-153 [-1, 512, 7, 7] 1,024
ReLU-154 [-1, 512, 7, 7] 0
Conv2d-155 [-1, 512, 7, 7] 2,359,296
BatchNorm2d-156 [-1, 512, 7, 7] 1,024
ReLU-157 [-1, 512, 7, 7] 0
Conv2d-158 [-1, 2048, 7, 7] 1,048,576
Block2-159 [-1, 2048, 7, 7] 0
BatchNorm2d-160 [-1, 2048, 7, 7] 4,096
ReLU-161 [-1, 2048, 7, 7] 0
Identity-162 [-1, 2048, 7, 7] 0
Conv2d-163 [-1, 512, 7, 7] 1,048,576
BatchNorm2d-164 [-1, 512, 7, 7] 1,024
ReLU-165 [-1, 512, 7, 7] 0
Conv2d-166 [-1, 512, 7, 7] 2,359,296
BatchNorm2d-167 [-1, 512, 7, 7] 1,024
ReLU-168 [-1, 512, 7, 7] 0
Conv2d-169 [-1, 2048, 7, 7] 1,048,576
Block2-170 [-1, 2048, 7, 7] 0
BatchNorm2d-171 [-1, 2048, 7, 7] 4,096
ReLU-172 [-1, 2048, 7, 7] 0
Identity-173 [-1, 2048, 7, 7] 0
Conv2d-174 [-1, 512, 7, 7] 1,048,576
BatchNorm2d-175 [-1, 512, 7, 7] 1,024
ReLU-176 [-1, 512, 7, 7] 0
Conv2d-177 [-1, 512, 7, 7] 2,359,296
BatchNorm2d-178 [-1, 512, 7, 7] 1,024
ReLU-179 [-1, 512, 7, 7] 0
Conv2d-180 [-1, 2048, 7, 7] 1,048,576
Block2-181 [-1, 2048, 7, 7] 0
Stack2-182 [-1, 2048, 7, 7] 0
BatchNorm2d-183 [-1, 2048, 7, 7] 4,096
ReLU-184 [-1, 2048, 7, 7] 0
AdaptiveAvgPool2d-185 [-1, 2048, 1, 1] 0
Flatten-186 [-1, 2048] 0
Linear-187 [-1, 4] 8,196
================================================================
Total params: 23,508,612
Trainable params: 23,508,612
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 241.68
Params size (MB): 89.68
Estimated Total Size (MB): 331.93
----------------------------------------------------------------
ResNet50V2(
(conv1): Sequential(
(conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(max_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(conv2): Stack2(
(conv): Sequential(
(0): Block2(
(preact): Sequential(
(0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(conv1): Sequential(
(0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): Block2(
(preact): Sequential(
(0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): Block2(
(preact): Sequential(
(0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Sequential(
(0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv3): Stack2(
(conv): Sequential(
(0): Block2(
(preact): Sequential(
(0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(conv1): Sequential(
(0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): Block2(
(preact): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): Block2(
(preact): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(3): Block2(
(preact): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Sequential(
(0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4): Stack2(
(conv): Sequential(
(0): Block2(
(preact): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(conv1): Sequential(
(0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(3): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(4): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(5): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Sequential(
(0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv5): Stack2(
(conv): Sequential(
(0): Block2(
(preact): Sequential(
(0): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(conv1): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): Block2(
(preact): Sequential(
(0): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): Block2(
(preact): Sequential(
(0): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU(inplace=True)
)
(short): Identity()
(conv1): Sequential(
(0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(post): Sequential(
(bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))
(flatten): Flatten(start_dim=1, end_dim=-1)
(fc): Linear(in_features=2048, out_features=4, bias=True)
)
)
运行结果
Start training...
[2023-02-16 16:13:40] Epoch: 1, Train_acc:21.2%, Train_loss:1.390, Test_acc:19.5%, Test_loss:1.403, Lr:1.00E-07
acc = 19.5%, saving model to best.pkl
[2023-02-16 16:13:52] Epoch: 2, Train_acc:21.7%, Train_loss:1.389, Test_acc:20.4%, Test_loss:1.419, Lr:1.00E-07
acc = 20.4%, saving model to best.pkl
[2023-02-16 16:14:03] Epoch: 3, Train_acc:22.1%, Train_loss:1.384, Test_acc:20.4%, Test_loss:1.412, Lr:1.00E-07
[2023-02-16 16:14:12] Epoch: 4, Train_acc:22.1%, Train_loss:1.386, Test_acc:18.6%, Test_loss:1.398, Lr:1.00E-07
[2023-02-16 16:14:23] Epoch: 5, Train_acc:22.6%, Train_loss:1.384, Test_acc:21.2%, Test_loss:1.407, Lr:1.00E-07
acc = 21.2%, saving model to best.pkl
[2023-02-16 16:14:34] Epoch: 6, Train_acc:25.7%, Train_loss:1.381, Test_acc:17.7%, Test_loss:1.412, Lr:1.00E-07
[2023-02-16 16:14:44] Epoch: 7, Train_acc:23.7%, Train_loss:1.381, Test_acc:18.6%, Test_loss:1.395, Lr:1.00E-07
[2023-02-16 16:14:55] Epoch: 8, Train_acc:25.7%, Train_loss:1.375, Test_acc:16.8%, Test_loss:1.400, Lr:1.00E-07
[2023-02-16 16:15:06] Epoch: 9, Train_acc:24.3%, Train_loss:1.379, Test_acc:21.2%, Test_loss:1.394, Lr:1.00E-07
[2023-02-16 16:15:16] Epoch:10, Train_acc:25.9%, Train_loss:1.376, Test_acc:23.0%, Test_loss:1.411, Lr:1.00E-07
acc = 23.0%, saving model to best.pkl
Done
EVAL 0.23009, 1.41133