首先推荐一篇文章:yolov3和yolov4的详解
这篇文章讲解的很详细了,我也是在某天正在看yolov4作者论文的时候发现一个公众号推送这篇文章,打开一看,瞬间对yolov4的整个网络结构了解了,之前看论文,其实对整个网络结构只有一个大概的理解,但是看完这篇文章之后,懂了,那么懂了之后,心中就开始蠢蠢欲动的想复现整个网络结构,于是就开始代码敲起来,敲起来之前,安利一个软件:netron,这个软件可以将yolov4的整个网络结构更加详细的以图表的形式表现出来,于是我就按照上面推荐的那篇文章以及推荐的软件显示的详细的网络结构开始敲代码了,代码有些长,大家有兴趣的可以看看,我也算是记录一下自己的历程。
'''this code was designed by nike hu in 2020 5.30'''
import torch
import torch.nn as nn
# mish激活函数pytorch里面暂时没有,这里自己写
class Mish(nn.Module):
def __init__(self):
super(Mish, self).__init__()
def forward(self, x): # Mish=x * tanh(ln(1+e^x))
x = x * torch.tanh(nn.functional.softplus(x))
return x
# CBL模块 conv-batchnorml-leakrelu
class CBL(nn.Module):
def __init__(self, input, output, kernel=1, stride=1, padding=0):
super(CBL, self).__init__()
self.cbl = nn.Sequential(
nn.Conv2d(input, output, kernel_size=kernel, stride=stride, padding=padding),
nn.BatchNorm2d(output),
nn.LeakyReLU() # inplace为True,将会改变输入的数据 ,否则不会改变原输入,只会产生新的输出
# 设置为True后,输出数据会覆盖输入数据,导致无法求取relu的梯度,一般在模型测试的时候,设置为True是没有问题的,
# 但是在训练过程中,这样做就会导致梯度传递出现问题,所以会导致loss飙升,训练失败(这句话存疑,只是有人提出这问题)
)
def forward(self, x):
x = self.cbl(x)
return x
# CBM模块 conv-batchnorml-Mish
class CBM(nn.Module):
def __init__(self, input, output, kernel=1, stride=1, padding=0):
super(CBM, self).__init__()
self.cbl = nn.Sequential(
nn.Conv2d(input, output, kernel_size=kernel, stride=stride, padding=padding),
nn.BatchNorm2d(output),
Mish()
)
def forward(self, x):
x = self.cbl(x)
return x
# resunit模块,两个cbm模块再来个shortcut
class ResUnit(nn.Module):
def __init__(self, input, middle):
super(ResUnit, self).__init__()
self.block = nn.Sequential(
CBM(input, middle, kernel=1, stride=1, padding=0),
CBM(middle, input, kernel=3, stride=1, padding=1)
)
def forward(self, x):
x1 = self.block(x)
x = x + x1 # resnet相加
return x
class CSPResNet(nn.Module):
def __init__(self, cspin, resin, resmiddle, x):
# 这里cspin代表cspnet中第一个conv的输入,resin代表cspnet里面
# resnet的输入,resmiddle代表中间的层数,conout代表cspnet
# connect操作的时候的层数,x代表cspnet里面有多少个resnet
super(CSPResNet, self).__init__()
self.conv = CBM(cspin, resin, kernel=1, stride=1, padding=0)
self.res = []
if x == 1: # 如果只有一个resnet,resin和resmiddle才会不一样
self.res.append(ResUnit(resin, resmiddle))
else:
for i in range(x): # 当不止一个resnet的时候,resin和resmiddle一样
self.res.append(ResUnit(resin, resmiddle))
self.block = nn.Sequential(*self.res)
self.conv1 = CBM(resin, resin, kernel=1, stride=1, padding=0) # 这个是紧接resnet后面的卷积操作
self.conv2 = CBM(cspin, resin, kernel=1, stride=1, padding=0) # 这个是用在cspnet的另外一个分支
def forward(self, x):
x1 = self.conv(x)
x1 = self.block(x1)
x1 = self.conv1(x1)
x2 = self.conv2(x)
x = torch.cat((x1, x2), dim=1)
return x
class CSPDarknet(nn.Module):
def __init__(self):
super(CSPDarknet, self).__init__()
self.con1_1 = CBM(3, 32, kernel=3, stride=1, padding=1)
self.con1_2 = CBM(32, 64, kernel=3, stride=2, padding=1)
self.CSP1 = CSPResNet(64, 64, 32, 1)
# 接下来第二个csp
self.con2_1 = CBM(128, 64, kernel=1, stride=1, padding=0)
self.con2_2 = CBM(64, 128, kernel=3, stride=2, padding=1)
self.csp2 = CSPResNet(128, 64, 64, 2)
# 接下来第三个csp
self.con3_1 = CBM(128, 128, kernel=1, stride=1, padding=0)
self.con3_2 = CBM(128, 256, kernel=3, stride=2, padding=1)
self.csp3 = CSPResNet(256, 128, 128, 8)
# 第四个csp,这里开始要开始有分支出去了
self.con4_1 = CBM(256, 256, kernel=1, stride=1, padding=0)
self.con4_2 = CBM(256, 512, kernel=3, stride=2, padding=1)
self.csp4 = CSPResNet(512, 256, 256, 8)
# 第五个csp
self.con5_1 = CBM(512, 512, kernel=1, stride=1, padding=0)
self.con5_2 = CBM(512, 1024, kernel=3, stride=2, padding=1)
self.csp5 = CSPResNet(1024, 512, 512, 4)
self.con5_3 = CBM(1024, 1024, kernel=1, stride=1, padding=0)
# 结束主干网络的搭建,开始进行neck部分的之fpn搭建
self.neck1 = nn.Sequential(
CBL(1024, 512, kernel=1, stride=1, padding=0),
CBL(512, 1024, kernel=3, stride=1, padding=1),
CBL(1024, 512, kernel=1, stride=1, padding=0)
)
self.neck_spp1_1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
self.neck_spp1_2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=4)
self.neck_spp1_3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=6)
self.neck2 = nn.Sequential(
CBL(2048, 512, kernel=1, stride=1, padding=0),
CBL(512, 1024, kernel=3, stride=1, padding=1),
CBL(1024, 512, kernel=1, stride=1, padding=0)
)
self.neck3 = CBL(512, 256, kernel=1, stride=1, padding=0)
self.unsample = nn.Upsample(scale_factor=2)
self.fpn1 = CBL(512, 256, kernel=1, stride=1, padding=0) # 这里给第二个分支用
self.neck4 = nn.Sequential(
CBL(512, 256, kernel=1, stride=1, padding=0),
CBL(256, 512, kernel=3, stride=1, padding=1),
CBL(512, 256, kernel=1, stride=1, padding=0),
CBL(256, 512, kernel=3, stride=1, padding=1),
CBL(512, 256, kernel=1, stride=1, padding=0)
)
self.neck5 = nn.Sequential(
CBL(256, 128, kernel=1, stride=1, padding=0),
nn.Upsample(scale_factor=2)
)
self.fpn2 = CBL(256, 128, kernel=1, stride=1, padding=0)
# 开始进行pan搭建
self.neck6 = nn.Sequential(
CBL(256, 128, kernel=1, stride=1, padding=0),
CBL(128, 256, kernel=3, stride=1, padding=1),
CBL(256, 128, kernel=1, stride=1, padding=0),
CBL(128, 256, kernel=3, stride=1, padding=1),
CBL(256, 128, kernel=1, stride=1, padding=0)
)
# 第一个layer层的处理
self.out1 = nn.Sequential(
CBL(128, 256, kernel=3, stride=1, padding=1),
nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(255),
) # 这里255会根据类别的数量变化
# 接下来第二个layer层的处理
self.out2_conv1 = CBL(128, 256, kernel=3, stride=2, padding=1)
self.out2_conv2 = nn.Sequential(
CBL(512, 256, kernel=1, stride=1, padding=0),
CBL(256, 512, kernel=3, stride=1, padding=1),
CBL(512, 256, kernel=1, stride=1, padding=0),
CBL(256, 512, kernel=3, stride=1, padding=1),
CBL(512, 256, kernel=1, stride=1, padding=0),
)
self.out2 = nn.Sequential(
CBL(256, 512, kernel=3, stride=1, padding=1),
nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(255),
)
# 接下来开始第三个layer层的处理
self.out3_conv1 = CBL(256, 512, kernel=3, stride=2, padding=1)
self.out3_conv2 = nn.Sequential(
CBL(1024, 512, kernel=1, stride=1, padding=0),
CBL(512, 1024, kernel=3, stride=1, padding=1),
CBL(1024, 512, kernel=1, stride=1, padding=0),
CBL(512, 1024, kernel=3, stride=1, padding=1),
CBL(1024, 512, kernel=1, stride=1, padding=0),
)
self.out3 = nn.Sequential(
CBL(512, 1024, kernel=3, stride=1, padding=1),
nn.Conv2d(1024, 255, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(255),
)
def forward(self, x):
x = self.con1_1(x)
x = self.con1_2(x)
x = self.CSP1(x)
# 第二个csp向前推进
x = self.con2_1(x)
x = self.con2_2(x)
x = self.csp2(x)
# 第三个csp向前推进
x = self.con3_1(x)
x = self.con3_2(x)
x = self.csp3(x)
# 第四个csp向前推进
x = self.con4_1(x)
x1 = x # 第一个分支
x = self.con4_2(x)
x = self.csp4(x)
# 第五个csp向前推进
x = self.con5_1(x)
x2 = x # 第二个分支
x = self.con5_2(x)
x = self.csp5(x)
x = self.con5_3(x)
# 接下来进行neck部分的搭建
x = self.neck1(x)
y1 = self.neck_spp1_1(x)
y2 = self.neck_spp1_2(x)
y3 = self.neck_spp1_3(x)
x = torch.cat((x, y1, y2, y3), dim=1)
x = self.neck2(x)
x3 = x # 第三个分支
x = self.neck3(x)
# 下面开始使用第二个分支
x = self.unsample(x)
y4 = self.fpn1(x2)
x = torch.cat((x, y4), dim=1)
# 下面开始使用第一个分支并且创建第四个分支
x = self.neck4(x)
x4 = x # 第四个分支
x = self.neck5(x)
y5 = self.fpn2(x1)
x = torch.cat((y5, x), dim=1)
# 接下来开始进行PANnet和输出
x = self.neck6(x)
x5 = x # 第五个分支
out1 = self.out1(x)
# 开始处理第二个layer层
x5 = self.out2_conv1(x5)
out2 = torch.cat((x5, x4), dim=1)
out2 = self.out2_conv2(out2)
x6 = out2 # 第6个分支
out2 = self.out2(out2)
# 接下来开始处理第三个layer层
out3 = self.out3_conv1(x6)
out3 = torch.cat((out3, x3), dim=1)
out3 = self.out3_conv2(out3)
out3 = self.out3(out3)
print(out1.shape)
print(out2.shape)
print(out3.shape)
return out1, out2, out3
if __name__ == '__main__':
x = torch.randn(1, 3, 608, 608)
net = CSPDarknet()
device = torch.device('cuda:0')
net.to(device)
x = x.to(device)
x = net(x)
然后看看输出结果:
torch.Size([1, 255, 76, 76])
torch.Size([1, 255, 38, 38])
torch.Size([1, 255, 19, 19])
代码里面的有些注释其实有些迷,我也是随手写的,这也就导致只有我知道是什么意思,大家见谅,我想真正去看我写的代码的人也不会太多,就不去改注释加注释了。好了,代码复现成功了百分之九十,为啥说百分之90呢?因为作者在论文中还说了要用到Cross mini-Batch Normalization (CmBN)和DropOut: A simple way to prevent neural networks from overfitting,但是,pytorch中目前并没有这两个的接口,对于github上的大佬公开的yolov4的源代码,我望了一眼,我也没看见他们对于这cmbn和dropblock是怎么处理的。对于dropblock,我找到大佬的接口,截图如下:
然后就可以在通过这个接口去进行dropblock操作了,但是,作者论文里面只是说了要使用这个,具体使用在哪里我很迷惘,是每次conv就使用还是在某些特定的conv后面使用,而且对于cmbn,是用cmbn代替pytorch里面的batch_normal,还是只是代替一部分,我感觉作者说的都不是很详细,可能是我对论文没有读透的原因,有空我再去研究研究吧。
2020 5.30