def conv_bn(inp, oup, stride = 1):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False), #卷积核大小为3*3
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
def conv_dw(inp, oup, stride = 1):
return nn.Sequential(
# part1 dw卷积,输出通道等于输入通道
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), #卷积核大小为3*3
nn.BatchNorm2d(inp),
nn.ReLU6(inplace=True),
# part2 普通1*1卷积,调整通道数
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True),
)
# stage1最后的输出是52,52,256,作为一个预测特征层
# stage2最后的输出是26,26,512,作为一个预测特征层
# stage3最后的输出是13,13,1024,作为一个预测特征层
class MobileNetV1(nn.Module):
def __init__(self):
super(MobileNetV1, self).__init__()
self.stage1 = nn.Sequential(
# 416,416,3 -> 208,208,32
conv_bn(3, 32, 2), #inp=3, oup=32, stride=2
# 208,208,32 -> 208,208,64
conv_dw(32, 64, 1), #inp=32, oup=64, stride=1
# 208,208,64 -> 104,104,128
conv_dw(64, 128, 2),
# 104, 104, 128 -> 104,104,128
conv_dw(128, 128, 1),
# 104,104,128 -> 52,52,256
conv_dw(128, 256, 2),
# 52,52,256 -> 52,52,256
conv_dw(256, 256, 1),
)
# 52,52,256 -> 26,26,512
self.stage2 = nn.Sequential(
# 52,52,256 -> 26,26,512
conv_dw(256, 512, 2),
# 26,26,512 -> 26,26,512
conv_dw(512, 512, 1),
# 26,26,512 -> 26,26,512
conv_dw(512, 512, 1),
# 26,26,512 -> 26,26,512
conv_dw(512, 512, 1),
# 26,26,512 -> 26,26,512
conv_dw(512, 512, 1),
# 26,26,512 -> 26,26,512
conv_dw(512, 512, 1),
)
# 26,26,512 -> 13,13,1024
self.stage3 = nn.Sequential(
# 26,26,512 -> 13,13,1024
conv_dw(512, 1024, 2),
# 13,13,1024 -> 13,13,1024
conv_dw(1024, 1024, 1),
)
def forward(self, x):
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
return x
def mobilenet_v1(pretrained=False, progress=True):
model = MobileNetV1()
if pretrained:
state_dict = torch.load('./model_data/mobilenet_v1_weights.pth')
model.load_state_dict(state_dict, strict=True)
return model
只需将MobileNetV1结构替换原yolov4中的CSPDarknet53主干网络即可,后面的Neck层和Head层都是一样的,我们还要定义一个变量in_filters,它对应三个预测特征层的输出通道数,对于不同的主干网络,三个预测特征层的输出通道数是不一样的;还有一点需要注意的是,为了减少参数量,我们把Neck层和Head层中所有的3*3卷积换为了可分离卷积(make_three_conv中有一个,make_five_conv中有两个,yolo_head中有一个,还有down_sample1和down_sample2中的卷积,共六处)
class MobileNetV1(nn.Module):
def __init__(self, pretrained = False):
super(MobileNetV1, self).__init__()
self.model = mobilenet_v1(pretrained=pretrained)
def forward(self, x):
out3 = self.model.stage1(x) # 52,52,256
out4 = self.model.stage2(out3) # 26,26,512
out5 = self.model.stage3(out4) # 13,13,1024
return out3, out4, out5
可参考:YOLOV4网络结构搭建_m0_56247038的博客-CSDN博客
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): # groups=1为普通卷积,groups=in_channel为depthwise卷积
padding = (kernel_size - 1) // 2 # kernel_size=3则padding=1;kernel_size=1则padding=0
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio): #expand_ratio指的就是t
super(InvertedResidual, self).__init__()
self.stride = stride
hidden_dim = inp * expand_ratio #倒残差结构中第一个1*1卷积层卷积核的个数,
# 用于判断在正向传播过程中是否使用shortcut,只有当 stride = 1 且输入特征矩阵与输出特征矩阵 shape 相同的时候才有 shortcut
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
# 倒残差结构中第一个1*1卷积层是用来对通道数进行调整的
# 如果expand_ratio = 1,也就是说倒残差结构中第一个1*1卷积层输入通道数和输出通道数是相等的,所以可以不要
# 对应expand_ratio != 1的倒残差结构,就需要第一个1*1卷积层进行通道数的调整,输出通道数为hidden_dim = inp * expand_ratio
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) #将第一个1*1卷积层添加进layers中
layers.extend([
# 此处设置了groups参数,故为dw卷积,dw卷积输出通道数和输入通道数相同
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# 此处的1*1卷积层用的是线性激活函数而非ReLU6,故不能用ConvBNReLU结构了
nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(oup),
# 这里本来是要加一层激活函数的,但是线性激活函数就是输出等于输入(y=x),不需要做任何处理
])
self.conv = nn.Sequential(*layers)
def forward(self, x): # 定义正向传播过程
if self.use_res_connect:
return x + self.conv(x) # 使用shortcut
else:
return self.conv(x) # 不使用shortcut
class MobileNetV2(nn.Module): # 定义MobileNetV2结构
def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8): # alpha为一个超参数,卷积核的倍率
super(MobileNetV2, self).__init__()
block = InvertedResidual # 倒残差结构传给block
input_channel = 32
last_channel = 1280
# 倒残差结构参数列表
# 其中t表示在Inverted Residuals结构中1×1卷积升维的倍率(相较于输入通道而言的)
# c是输出特征矩阵的深度 channel
# n表示bottleneck(即Inverted Residuals结构)重复的次数
# s表示步距,但是只表示第一个bottleneck中DW卷积的步距,后面重复bottleneck的stride都是等于1的
inverted_residual_setting = [
# t, c, n, s
# 208,208,32 -> 208,208,16
[1, 16, 1, 1],
# 208,208,16 -> 104,104,24
[6, 24, 2, 2],
# 104,104,24 -> 52,52,32 作为一个预测特征层
[6, 32, 3, 2],
# 52,52,32 -> 26,26,64
[6, 64, 4, 2],
# 26,26,64 -> 26,26,96 作为一个预测特征层
[6, 96, 3, 1],
# 26,26,96 -> 13,13,160
[6, 160, 3, 2],
# 13,13,160 -> 13,13,320 作为一个预测特征层
[6, 320, 1, 1],
]
features = []
# 定义第一层卷积层,输入为RGB三通道,输出为input_channel
features.append(ConvBNReLU(3, input_channel, stride=2))
# 定义一系列bottleneck层
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * alpha, round_nearest)
# output_channel = [16,24,32,64,96,160,320]
for i in range(n):
# 第一个bottleneck中DW卷积的步距为s,后面重复的bottleneck的stride都是等于1的
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel #将output_channel传给input_channel作为下一层的输入通道数
# 定义倒数第三层的卷积层
features.append(ConvBNReLU(input_channel, last_channel, kernel_size=1)) # last_channel = 1280
# combine feature layers
self.features = nn.Sequential(*features) # 将一系列层结构打包成一个整体
# -----------------------以上是特征提取部分-------------------------
# 权重初始化 weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x): # 前向传播过程
x = self.features(x)
# x = self.avgpool(x)
# x = torch.flatten(x, 1)
# x = self.classifier(x)
return x
def mobilenet_v2(pretrained=False, progress=True):
model = MobileNetV2()
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir="model_data",
progress=progress)
model.load_state_dict(state_dict)
return model
只需将MobileNetV2结构替换原yolov4中的CSPDarknet53主干网络即可,后面的Neck层和Head层都是一样的,我们还要定义一个变量in_filters,它对应三个预测特征层的输出通道数,对于不同的主干网络,三个预测特征层的输出通道数是不一样的;还有一点需要注意的是,为了减少参数量,我们把Neck层和Head层中所有的3*3卷积换为了可分离卷积(make_three_conv中有一个,make_five_conv中有两个,yolo_head中有一个,还有down_sample1和down_sample2中的卷积,共六处)
class MobileNetV2(nn.Module):
def __init__(self, pretrained = False):
super(MobileNetV2, self).__init__()
self.model = mobilenet_v2(pretrained=pretrained)
def forward(self, x):
out3 = self.model.features[:7](x) #52,52,32 获取features中前0~6层的结果,从零开始数的
out4 = self.model.features[7:14](out3) #26,26,96 获取features中7~13层结果
out5 = self.model.features[14:18](out4) #13,13,320 获取features中14~17层结果
return out3, out4, out5
Pytorch 搭建自己的Mobilenet-YoloV4目标检测平台(Bubbliiiing 深度学习 教程)_哔哩哔哩_bilibili