姓名:毕晓鹏
学号:19021210824
【嵌牛导读】自己平时积累了一些pytorch的训练trick,分享给大家
【嵌牛鼻子】深度学习,分类网络
【嵌牛提问】了解其他的网络吗
【嵌牛正文】
resnet
平时经常用到resnet网络当作backbone,resnet结构清晰,比较方便单独拿出一层来单独操作。
BasicBlock采用的是两个3x3的卷积核
Bottleneck先用1x1卷积核降维,然后3x3卷积核,1x1卷积核升维,所以残差块输入通道和输出通道是不变的。
resnet层数计算:【3,4,6,3】(3+4+6+3)*3 + 2 = 50
3,4,6,3为残差块数量,其中每个块里面含有三层,然后加上分类层和第一层一共50
1x1卷积核作用:
降维:减小参数量,减小了通道数量,后面进行下一步操作,需要的卷积核数量就少了。
升维:跨通道信息组合,降维的也会有不同通道的信息组合,另外可以feature maps size不变的(即不损失分辨率)的前提下大幅增加非线性特性
resne网络整体结构
# encoding: utf-8"""@author: liaoxingyu@contact: [email protected]"""importmathimporttorchfromtorchimportnndefconv3x3(in_planes,out_planes,stride=1):"""3x3 convolution with padding"""returnnn.Conv2d(in_planes,out_planes,kernel_size=3,stride=stride,padding=1,bias=False)classBasicBlock(nn.Module):expansion=1def__init__(self,inplanes,planes,stride=1,downsample=None):super(BasicBlock,self).__init__()self.conv1=conv3x3(inplanes,planes,stride)self.bn1=nn.BatchNorm2d(planes)self.relu=nn.ReLU(inplace=True)self.conv2=conv3x3(planes,planes)self.bn2=nn.BatchNorm2d(planes)self.downsample=downsampleself.stride=stridedefforward(self,x):residual=xout=self.conv1(x)out=self.bn1(out)out=self.relu(out)out=self.conv2(out)out=self.bn2(out)ifself.downsampleisnotNone:residual=self.downsample(x)out+=residualout=self.relu(out)returnout#输入 in_planes #先压缩通道 然后恢复到 inplanesclassBottleneck(nn.Module):expansion=4def__init__(self,inplanes,planes,stride=1,downsample=None):super(Bottleneck,self).__init__()self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,bias=False)self.bn1=nn.BatchNorm2d(planes)self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=stride,padding=1,bias=False)self.bn2=nn.BatchNorm2d(planes)self.conv3=nn.Conv2d(planes,planes*4,kernel_size=1,bias=False)self.bn3=nn.BatchNorm2d(planes*4)self.relu=nn.ReLU(inplace=True)self.downsample=downsampleself.stride=stridedefforward(self,x):residual=xout=self.conv1(x)out=self.bn1(out)out=self.relu(out)out=self.conv2(out)out=self.bn2(out)out=self.relu(out)out=self.conv3(out)out=self.bn3(out)ifself.downsampleisnotNone:residual=self.downsample(x)out+=residualout=self.relu(out)returnoutclassResNet(nn.Module):def__init__(self,last_stride=1,block=Bottleneck,layers=[3,4,6,3]):#输入通道self.inplanes=64super().__init__()self.conv1=nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)self.bn1=nn.BatchNorm2d(64)self.relu=nn.ReLU(inplace=True)self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)self.layer1=self._make_layer(block,64,layers[0])self.layer2=self._make_layer(block,128,layers[1],stride=2)self.layer3=self._make_layer(block,256,layers[2],stride=2)self.layer4=self._make_layer(block,512,layers[3],stride=last_stride)def_make_layer(self,block,planes,blocks,stride=1):downsample=None#把通道数量调整为planes*4ifstride!=1orself.inplanes!=planes*block.expansion:downsample=nn.Sequential(nn.Conv2d(self.inplanes,planes*block.expansion,kernel_size=1,stride=stride,bias=False),nn.BatchNorm2d(planes*block.expansion),)layers=[]layers.append(block(self.inplanes,planes,stride,downsample))self.inplanes=planes*block.expansion#blocks 每层含有的block数量foriinrange(1,blocks):layers.append(block(self.inplanes,planes))returnnn.Sequential(*layers)defforward(self,x):#conv1 7x7 64 stride=2x=self.conv1(x)x=self.bn1(x)x=self.relu(x)#max pool 3x3 stride=2x=self.maxpool(x)x=self.layer1(x)x=self.layer2(x)x=self.layer3(x)x=self.layer4(x)returnxdefload_param(self,model_path):#加载预训练模型param_dict=torch.load(model_path)# param_dict = torch.load("/home/bi/Downloads/resnet50-19c8e357.pth")print("load pretain success!")foriinparam_dict:if'fc'ini:continueself.state_dict()[i].copy_(param_dict[i])defrandom_init(self):forminself.modules():ifisinstance(m,nn.Conv2d):n=m.kernel_size[0]*m.kernel_size[1]*m.out_channelsm.weight.data.normal_(0,math.sqrt(2./n))elifisinstance(m,nn.BatchNorm2d):m.weight.data.fill_(1)m.bias.data.zero_()defresnet50(last_stride):returnResNet(last_stride=1,block=Bottleneck,layers=[3,4,6,3])
resnext
resnext使用了分组卷积的形式来提高网络的性能
分组卷积效果好的原因:
不同group可以学到不同的信息,可以参考alexnet
代码上只是残差块不同
class Bottleneck(nn.Module):
"""
RexNeXt bottleneck type C
"""
expansion = 4
def __init__(self, inplanes, planes, with_ibn, baseWidth, cardinality, stride=1, downsample=None):
""" Constructor
Args:
inplanes: input channel dimensionality
planes: output channel dimensionality
baseWidth: base width. default 4
cardinality: num of convolution groups. default 32
stride: conv stride. Replaces pooling layer.
"""
super(Bottleneck, self).__init__()
D = int(math.floor(planes * (baseWidth / 64)))
C = cardinality
self.conv1 = nn.Conv2d(inplanes, D * C, kernel_size=1, stride=1, padding=0, bias=False)
if with_ibn:
self.bn1 = IBN(D * C)
else:
self.bn1 = nn.BatchNorm2d(D * C)
self.conv2 = nn.Conv2d(D * C, D * C, kernel_size=3, stride=stride, padding=1, groups=C, bias=False)
self.bn2 = nn.BatchNorm2d(D * C)
self.conv3 = nn.Conv2d(D * C, planes * 4, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
senet
senet模块操作
首先是Squeeze操作,我们顺着空间维度来进行特征压缩,将每个二维的特征通道变成一个实数,这个实数某种程度上具有全局的感受野,并且输出的维度和输入的特征通道数相匹配。它表征着在特征通道上响应的全局分布,而且使得靠近输入的层也可以获得全局的感受野,这一点在很多任务中都是非常有用的。
其次是Excitation操作,它是一个类似于循环神经网络中门的机制。通过参数 来为每个特征通道生成权重,其中参数 被学习用来显式地建模特征通道间的相关性。
最后是一个Reweight的操作,我们将Excitation的输出的权重看做是进过特征选择后的每个特征通道的重要性,然后通过乘法逐通道加权到先前的特征上,完成在通道维度上的对原始特征的重标定。
实现
from torch import nn
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, int(channel / reduction), bias=False),
nn.ReLU(inplace=True),
nn.Linear(int(channel / reduction), channel, bias=False),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y.expand_as(x)
densenet
densenet的特点
特征重用:使用密集链接,使用了不同层次的特征,一般分类网络只使用最高层次的特征,densenet保留了低维度的特征,对特征的利用率比较高。
参数少,因为每个通道数很少
占用显存高,因为做前向传播时,不但需要当前层特征,还需要之前的特征。
res2net
网络将原来串行的方式,通过在通道上进行拆分,不同尺度特征的提取,在融合特征,提高网络的复杂度和表达能力。上图表示在一个block中有4个尺度。
图中的横向箭头是加操作,融合不同感受野的特征图。
残差块实现代码为:
class Bottle2neck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'):
""" Constructor
Args:
inplanes: input channel dimensionality
planes: output channel dimensionality
stride: conv stride. Replaces pooling layer.
downsample: None when stride = 1
baseWidth: basic width of conv3x3
scale: number of scale.
type: 'normal': normal set. 'stage': first block of a new stage.
"""
super(Bottle2neck, self).__init__()
width = int(math.floor(planes * (baseWidth / 64.0)))
self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(width * scale)
if scale == 1:
self.nums = 1
else:
self.nums = scale - 1
if stype == 'stage':
self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
convs = []
bns = []
for i in range(self.nums):
convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))
bns.append(nn.BatchNorm2d(width))
self.convs = nn.ModuleList(convs)
self.bns = nn.ModuleList(bns)
self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stype = stype
self.scale = scale
self.width = width
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
spx = torch.split(out, self.width, 1)
for i in range(self.nums):
if i == 0 or self.stype == 'stage':
sp = spx[i]
else:
sp = sp + spx[i]
sp = self.convs[i](sp)
sp = self.relu(self.bns[i](sp))
if i == 0:
out = sp
else:
out = torch.cat((out, sp), 1)
if self.scale != 1 and self.stype == 'normal':
out = torch.cat((out, spx[self.nums]), 1)
elif self.scale != 1 and self.stype == 'stage':
out = torch.cat((out, self.pool(spx[self.nums])), 1)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
resnest
ResNeSt 的全称是:Split-Attention Networks,也就是特别引入了Split-Attention模块。
论文借鉴了
GoogleNet 采用了Multi-path机制,其中每个网络块均由不同的卷积kernels组成。
ResNeXt在ResNet bottle模块中采用组卷积,将multi-path结构转换为统一操作。
SE-Net 通过自适应地重新校准通道特征响应来引入通道注意力(channel-attention)机制。
SK-Net 通过两个网络分支引入特征图注意力(feature-map attention)。
ResNeSt 和 SE-Net、SK-Net 的对应图示如下:
其中上图中都包含的 Split Attention模块如下图所示:
参考: