这张图片转自此篇博客.
观察我们可以发现Darknet53本身由大量的残差连接构成,所以首先我们实现基础卷积模块
########################
# 创建一个基础的卷积模块 DBL
########################
class DBl(Module):
def __init__(self, inplane, plane, kernel_size=1, padding=0, stride=1):
super(DBl, self).__init__()
self.conv = Conv2d(in_channels=inplane,
out_channels=plane,
kernel_size=kernel_size,
padding=padding,
stride=stride)
self.bn = BatchNorm2d(num_features=plane)
self.act = LeakyReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
output = self.act(x)
return output
########################
# 创建残差连接块 ResUnit
########################
class ResidualBlock(Module):
def __init__(self, inplanes, planes):
super(ResidualBlock, self).__init__()
self.dbl1 = DBl(inplanes, planes[0])
self.dbl2 = DBl(planes[0], planes[1], 3, 1, 1)
def forward(self, x):
residual = x
x = self.dbl1(x)
x = self.dbl2(x)
output = x + residual
return output
Darknet53的网络 除了前面两层,其余的结构都可以认为是按照[1, 2, 8, 8 4]个残差连接块所构成, 创建残差连接块之后, 我们可以传入一个blocks用来创建Darknet53, 但是需要注意的是, 由于Yolov3是一个多尺度的预测,所以我们需要3个输出,分别对应的卷积步长[32, 16, 8], 用于之后的Concatenate连接 以及YoloHead的预测
class Darknet(Module):
def __init__(self, blocks):
super(Darknet, self).__init__()
self.inplanes = 3
# 第一层卷积
self.conv1 = DBl(inplane=self.inplanes, plane=32, kernel_size=1, padding=0, stride=1)
self.layers_1 = self._make_layer(block=blocks[0], inplanes=64, planes=[32, 64])
self.layers_2 = self._make_layer(block=blocks[1], inplanes=128, planes=[64, 128])
self.layers_3 = self._make_layer(block=blocks[2], inplanes=256, planes=[128, 256])
self.layers_4 = self._make_layer(block=blocks[3], inplanes=512, planes=[256, 512])
self.layers_5 = self._make_layer(block=blocks[4], inplanes=1024, planes=[512, 1024])
def forward(self, x):
x = self.conv1(x)
x = self.layers_1(x)
x = self.layers_2(x)
x = self.layers_3(x)
output3 = x
x = self.layers_4(x)
output2 = x
x = self.layers_5(x)
output1 = x
# print(output1.size())
# print(output2.size())
# print(output3.size())
return output1, output2, output3
@staticmethod
def _make_layer(block, inplanes, planes):
layers = [DBl(planes[0], planes[1], 3, 1, 2)]
for i in range(block):
layers.append(ResidualBlock(inplanes=inplanes, planes=planes))
return Sequential(*layers)
def get_Darknet53(blocks=[1, 2, 8, 8, 4], pretrained=False, device="cuda:0"):
print("获取Darknet53")
model = Darknet(blocks)
checkpoint = ""
if pretrained:
print("需要预训练, 正在加载权重.............")
checkpoint = model.load(checkpoint, map_locatin=device)
model.load_state_dict(checkpoint["weight"])
print("加载权重完成........................")
return model
class YoloNeck(Module):
def __init__(self):
super(YoloNeck, self).__init__()
self.convSets_1 = ConvSets(inplane=1024, plane=512)
self.convSets_2 = ConvSets(inplane=768, plane=256)
self.convSets_3 = ConvSets(inplane=384, plane=128)
self.conv_1 = DBl(inplane=512, plane=256)
self.upsample_1 = Upsample(scale_factor=2, mode="nearest")
self.conv_2 = DBl(inplane=256, plane=128)
self.upsample_2 = Upsample(scale_factor=2, mode="nearest")
def forward(self, x):
output1, output2, output3 = x
output1, x = self.convSets_1(output1)
x = self.conv_1(x)
x = self.upsample_1(x)
x = torch.cat((x, output2), dim=1)
output2, x = self.convSets_2(x)
x = self.conv_2(x)
x = self.upsample_2(x)
x = torch.cat((x, output3), dim=1)
output3, x = self.convSets_3(x)
return output1, output2, output3
class YoloHead(Module):
def __init__(self, class_nums, anchor_nums):
super(YoloHead, self).__init__()
self.head_1 = Sequential(
DBl(inplane=512, plane=1024, kernel_size=3, padding=1),
Conv2d(in_channels=1024, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))
self.head_2 = Sequential(
DBl(inplane=256, plane=512, kernel_size=3, padding=1),
Conv2d(in_channels=512, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))
self.head_3 = Sequential(
DBl(inplane=128, plane=256, kernel_size=3, padding=1),
Conv2d(in_channels=256, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))
def forward(self, x):
output1, output2, output3 = x
output1 = self.head_1(output1)
output2 = self.head_2(output2)
output3 = self.head_3(output3)
return output1, output2, output3
class YoloV3(Module):
def __init__(self, class_nums=class_nums, anchor_nums=anchor_nums, train=False, anchor=anchor, anchor_mask=masks):
super(YoloV3, self).__init__()
self.back_bone = Darknet()
self.neck = YoloNeck()
self.head = YoloHead(class_nums=class_nums, anchor_nums=anchor_nums)
self.train = train
self.class_nums = class_nums
self.anchors_nums = anchor_nums
self.anchor = anchor
self.anchor_mask = anchor_mask
def forward(self, x):
output1, output2, output3 = self.back_bone(x)
output1, output2, output3 = self.neck((output1, output2, output3))
output1, output2, output3 = self.head((output1, output2, output3))
# print(output1.size(), output2.size(), output3.size())
output1 = decode_pred(output1, self.anchor[[mask for mask in self.anchor_mask[0]]], self.class_nums, 32.0,
self.train) # output1感受野最大
output2 = decode_pred(output2, self.anchor[[mask for mask in self.anchor_mask[1]]], self.class_nums, 16.0,
self.train) # output2感受野居中
output3 = decode_pred(output3, self.anchor[[mask for mask in self.anchor_mask[2]]], self.class_nums, 8.0,
self.train) # output3感受野最小
if self.train:
return output1, output2, output3
return yolo_nms((output1, output2, output3))
# 将输出分解成 bbox, pred_xy, pred_wh
def decode_pred(pred, anchor, class_nums, stride, train=False):
# 获得网格长度与宽度 是正方形所以只取一个
grid_size = pred.size(2)
# 改变形状
pred = pred.view(pred.size(0), 3, class_nums + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
# 假设传入anchor.size() = 3 x 2
pred_xy, pred_wh, pred_conf, pred_cls = torch.split(pred, (2, 2, 1, class_nums), dim=-1)
# 根据论文 对其使用激活函数
pred_xy = torch.sigmoid(pred_xy)
pred_conf = torch.sigmoid(pred_conf)
pred_cls = torch.sigmoid(pred_cls)
if train:
return pred_xy, pred_wh, pred_conf, pred_cls
# anchor映射到特征图上
anchor = anchor / stride
# 利用广播机制进行计算
anchor = torch.reshape(anchor, shape=(1, 3, 1, 1, 2))
# 获得网格
vx, vy = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
grid = torch.stack((vx, vy), 2).view((1, 1, grid_size, grid_size, 2))
bbox_xy = pred_xy + grid
bbox_wh = anchor * torch.exp(pred_wh)
# 通过最后一个维度拼接起来了 bbox[..., x,y,w,h]
bbox = torch.cat((bbox_xy, bbox_wh), dim=-1)
return bbox, pred_conf, pred_cls
# 非最大值抑制
def yolo_nms(output, iou_thresh=0.5):
b = []
c = []
cls = []
for o in output:
b.append(o[0].view(o[0].size(0), -1, 4))
c.append(o[1].view(o[1].size(0), -1, 1))
cls.append(o[2].view(o[2].size(0), -1, o[2].size(-1)))
bbox = torch.cat(b, dim=1)
conf = torch.cat(c, dim=1)
cls = torch.cat(cls, dim=1)
scores = conf * cls
scores = torch.tensor(torch.argmax(scores, dim=-1), dtype=torch.float)
idxs = []
for i in range(scores.size(0)):
idx = nms(bbox[i], scores[i], iou_thresh)
idxs.append(idx)
return bbox, conf, cls, idxs
这里会继续更新。。。。。