残差网络Resnet:Deep Residual Learning for Image Recognition 论文阅读笔记
即以下这张图:
实际上,Resnet和VGGnet的骨干网络相差无几,只是深度上更胜后者,并且增加了恒等映射(identity mapping)。
在复现VGG16的时候,采用的是逐层搭建,显然在此处并不可行,一共34层,每层需要正则化和激活函数,工作量相当大。
首先我们先将34层分为3部分
而主题部分根据通道的改变可以分为4个部分:
将每两层设置为Bottleneck:
class BottleNeck(nn.Module):
def __init__(self,in_chanels,out_chanels,stride=1,downsample=False):
super(BottleNeck,self).__init__()
self.conv1=nn.Conv2d(in_chanels,out_chanels,kernel_size=3,padding=1,stride=stride)
self.BN=nn.BatchNorm2d(out_chanels)
self.ReLu=nn.ReLU(inplace=True)
self.conv2=nn.Conv2d(out_chanels,out_chanels,kernel_size=3,padding=1,stride=1)
self.downsample = downsample
self.wi = nn.Sequential(
nn.Conv2d(in_chanels,out_chanels,kernel_size=1,padding=0,stride=stride),
nn.BatchNorm2d(out_chanels)
)
def forward(self,x):
identiey = x
out = self.conv1(x)
out = self.BN(out)
out = self.ReLu(out)
out = self.conv2(out)
out = self.BN(out)
if self.downsample == True:
identiey = self.wi(x)
out = out + identiey
out = self.ReLu(out)
return out
需要注意的是当Bottleneck层的输入和输出不符,需要对idenetity mapping 进行转换(等同于shortcut ),因此设置的参数downsample,看是否需要转换。
import torch
from torch import nn
class BottleNeck(nn.Module):
def __init__(self,in_chanels,out_chanels,stride=1,downsample=False):
super(BottleNeck,self).__init__()
self.conv1=nn.Conv2d(in_chanels,out_chanels,kernel_size=3,padding=1,stride=stride)
self.BN=nn.BatchNorm2d(out_chanels)
self.ReLu=nn.ReLU(inplace=True)
self.conv2=nn.Conv2d(out_chanels,out_chanels,kernel_size=3,padding=1,stride=1)
self.downsample = downsample
self.wi = nn.Sequential(
nn.Conv2d(in_chanels,out_chanels,kernel_size=1,padding=0,stride=stride),
nn.BatchNorm2d(out_chanels)
)
def forward(self,x):
identiey = x
out = self.conv1(x)
out = self.BN(out)
out = self.ReLu(out)
out = self.conv2(out)
out = self.BN(out)
if self.downsample == True:
identiey = self.wi(x)
out = out + identiey
out = self.ReLu(out)
return out
class ResNet34(nn.Module):
def __init__(self,num_classes):
super(ResNet34,self).__init__()
# 最开始的7x7卷积部分和最大池化,即第一层卷积
self.start = nn.Sequential(
# 使用7*7的卷积核,卷积步长为2,使得维度/2,padding长度根据公式计算得为3
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# 使用3*3得卷积核,卷积步长为2,使得维度/2,padding长度根据公式计算得为1
nn.MaxPool2d(kernel_size=3, stride=2,padding=1)
)
# 2-33层卷积
self.layers = nn.Sequential(
self._make_layer(64,64,False,3),
self._make_layer(64,128,True,4),
self._make_layer(128,256,True,6),
self._make_layer(256,512,True,3)
)
# 第34层全连接
self.fc = nn.Sequential(
# 自适应平均池化层,输出大小(1,1),将起变成1*1*512
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(start_dim=1,end_dim=-1),
nn.Linear(512, num_classes)
)
def forward(self,x):
return self.fc(self.layers(self.start(x)))
def _make_layer(self,in_chanels,out_chanels,downsample,num_blocks):
layes = []
layes.append(BottleNeck(in_chanels,out_chanels,downsample=downsample))
for _ in range(1,num_blocks):
layes.append(BottleNeck(out_chanels,out_chanels))
return nn.Sequential(*layes)
if __name__ =='__main__':
inputs = torch.rand((8,3,224,224)).cpu()
model = ResNet34(num_classes=1000).cpu().train()
outputs = model(inputs)
print(outputs.shape)
代码参考