import torch
import torch.nn as nn
import math
from collections import OrderedDict
# 基本的darknet块
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes): # resnet block中是 先进行一个1×1卷积 再进行一个3×3卷积
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1, # 1×1卷积目的是下降通道数
stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes[0])
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3, # 3×3卷积目的是扩张通道数,注意这里并不减少特征图的大小!!
stride=1, padding=1, bias=False) # 这样做可以帮助减少参数量
self.bn2 = nn.BatchNorm2d(planes[1])
self.relu2 = nn.LeakyReLU(0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out += residual
return out
class DarkNet(nn.Module):
def __init__(self, layers):
super(DarkNet, self).__init__()
self.inplanes = 32
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) # 第一个卷积 3->32
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu1 = nn.LeakyReLU(0.1)
self.layer1 = self._make_layer([32, 64], layers[0])
self.layer2 = self._make_layer([64, 128], layers[1])
self.layer3 = self._make_layer([128, 256], layers[2])
self.layer4 = self._make_layer([256, 512], layers[3])
self.layer5 = self._make_layer([512, 1024], layers[4])
self.layers_out_filters = [64, 128, 256, 512, 1024]
# 进行权值初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, planes, blocks): # 进行下采样且不断堆叠残差块
layers = []
# 下采样,步长为2,卷积核大小为3,用于减少特征图尺寸
layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
stride=2, padding=1, bias=False)))
layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
layers.append(("ds_relu", nn.LeakyReLU(0.1)))
# 加入darknet模块
self.inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
return nn.Sequential(OrderedDict(layers))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
out3 = self.layer3(x)
out4 = self.layer4(out3)
out5 = self.layer5(out4)
return out3, out4, out5
# pretrained为权重文件路径
def darknet53(pretrained, **kwargs):
model = DarkNet([1, 2, 8, 8, 4])
if pretrained:
if isinstance(pretrained, str):
model.load_state_dict(torch.load(pretrained))
else:
raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
return model
def conv2d(filter_in, filter_out, kernel_size):
pad = (kernel_size - 1) // 2 if kernel_size else 0
return nn.Sequential(OrderedDict([
("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)),
("bn", nn.BatchNorm2d(filter_out)),
("relu", nn.LeakyReLU(0.1)),
]))
def make_last_layers(filters_list, in_filters, out_filter):
# 包含7次卷积处理
m = nn.ModuleList([
conv2d(in_filters, filters_list[0], 1),
conv2d(filters_list[0], filters_list[1], 3),
conv2d(filters_list[1], filters_list[0], 1),
conv2d(filters_list[0], filters_list[1], 3),
conv2d(filters_list[1], filters_list[0], 1),
# 最后两次用于分类预测
conv2d(filters_list[0], filters_list[1], 3),
nn.Conv2d(filters_list[1], out_filter, kernel_size=1,
stride=1, padding=0, bias=True)
])
return m
class YoloBody(nn.Module):
def __init__(self, config):
super(YoloBody, self).__init__()
self.config = config
# backbone
self.backbone = darknet53(None)
out_filters = self.backbone.layers_out_filters # anchors.shape = [3, 3, 2]
# last_layer0 3 * (5 + 20) = 75
final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
# [512, 1024], 1024 , 75
self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)
# embedding1
final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
self.last_layer1_conv = conv2d(512, 256, 1)
self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 上采样 26×26×256
self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)
# embedding2
final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
self.last_layer2_conv = conv2d(256, 128, 1)
self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 上采样 52×52×128
self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def forward(self, x):
def _branch(last_layer, layer_in):
for i, e in enumerate(last_layer):
layer_in = e(layer_in)
if i == 4:
out_branch = layer_in
return layer_in, out_branch
# backbone
# out3-> 52×52×256, out4 -> 26×26×512, out5 -> 13×13×1024
x2, x1, x0 = self.backbone(x)
# yolo branch 0
out0, out0_branch = _branch(self.last_layer0, x0)
# yolo branch 1 卷积加上采样
x1_in = self.last_layer1_conv(out0_branch)
x1_in = self.last_layer1_upsample(x1_in)
x1_in = torch.cat([x1_in, x1], 1) # 行方向堆叠
out1, out1_branch = _branch(self.last_layer1, x1_in)
# yolo branch 2
x2_in = self.last_layer2_conv(out1_branch)
x2_in = self.last_layer2_upsample(x2_in)
x2_in = torch.cat([x2_in, x2], 1)
out2, _ = _branch(self.last_layer2, x2_in)
# 输出为3个金字塔特征层分类预测结果 out2 -> 52×52×75, out1 -> 26×26×75, out0 -> 13×13×75
# 参数用于判断先验框内是否包含物体、包含物体的种类、调整先验框的参数
return out0, out1, out2