Pytorch 搭建yolov3 搭建yolov3的网络结构

yolov3的网络结构如下图

Pytorch 搭建yolov3 搭建yolov3的网络结构_第1张图片
这张图片转自此篇博客.
观察我们可以发现Darknet53本身由大量的残差连接构成,所以首先我们实现基础卷积模块

创建基础卷积模块DBL

########################
# 创建一个基础的卷积模块 DBL
########################
class DBl(Module):
    def __init__(self, inplane, plane, kernel_size=1, padding=0, stride=1):
        super(DBl, self).__init__()
        self.conv = Conv2d(in_channels=inplane,
                           out_channels=plane,
                           kernel_size=kernel_size,
                           padding=padding,
                           stride=stride)
        self.bn = BatchNorm2d(num_features=plane)
        self.act = LeakyReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        output = self.act(x)
        return output

创建残差连接模块

########################
# 创建残差连接块 ResUnit
########################
class ResidualBlock(Module):
    def __init__(self, inplanes, planes):
        super(ResidualBlock, self).__init__()
        self.dbl1 = DBl(inplanes, planes[0])
        self.dbl2 = DBl(planes[0], planes[1], 3, 1, 1)

    def forward(self, x):
        residual = x

        x = self.dbl1(x)
        x = self.dbl2(x)
        output = x + residual
        return output

实现Darknet53

Darknet53的网络 除了前面两层,其余的结构都可以认为是按照[1, 2, 8, 8 4]个残差连接块所构成, 创建残差连接块之后, 我们可以传入一个blocks用来创建Darknet53, 但是需要注意的是, 由于Yolov3是一个多尺度的预测,所以我们需要3个输出,分别对应的卷积步长[32, 16, 8], 用于之后的Concatenate连接 以及YoloHead的预测

class Darknet(Module):
    def __init__(self, blocks):
        super(Darknet, self).__init__()
        self.inplanes = 3
        # 第一层卷积
        self.conv1 = DBl(inplane=self.inplanes, plane=32, kernel_size=1, padding=0, stride=1)
        self.layers_1 = self._make_layer(block=blocks[0], inplanes=64, planes=[32, 64])
        self.layers_2 = self._make_layer(block=blocks[1], inplanes=128, planes=[64, 128])
        self.layers_3 = self._make_layer(block=blocks[2], inplanes=256, planes=[128, 256])
        self.layers_4 = self._make_layer(block=blocks[3], inplanes=512, planes=[256, 512])
        self.layers_5 = self._make_layer(block=blocks[4], inplanes=1024, planes=[512, 1024])

    def forward(self, x):
        x = self.conv1(x)
        x = self.layers_1(x)
        x = self.layers_2(x)
        x = self.layers_3(x)
        output3 = x
        x = self.layers_4(x)
        output2 = x
        x = self.layers_5(x)
        output1 = x
        # print(output1.size())
        # print(output2.size())
        # print(output3.size())
        return output1, output2, output3

    @staticmethod
    def _make_layer(block, inplanes, planes):
        layers = [DBl(planes[0], planes[1], 3, 1, 2)]
        for i in range(block):
            layers.append(ResidualBlock(inplanes=inplanes, planes=planes))
        return Sequential(*layers)

def get_Darknet53(blocks=[1, 2, 8, 8, 4], pretrained=False, device="cuda:0"):
    print("获取Darknet53")
    model = Darknet(blocks)
    checkpoint = ""
    if pretrained:
        print("需要预训练, 正在加载权重.............")
        checkpoint = model.load(checkpoint, map_locatin=device)
        model.load_state_dict(checkpoint["weight"])
        print("加载权重完成........................")
    return model

定义 YoloNeck

class YoloNeck(Module):
    def __init__(self):
        super(YoloNeck, self).__init__()
        self.convSets_1 = ConvSets(inplane=1024, plane=512)
        self.convSets_2 = ConvSets(inplane=768, plane=256)
        self.convSets_3 = ConvSets(inplane=384, plane=128)
        self.conv_1 = DBl(inplane=512, plane=256)
        self.upsample_1 = Upsample(scale_factor=2, mode="nearest")
        self.conv_2 = DBl(inplane=256, plane=128)
        self.upsample_2 = Upsample(scale_factor=2, mode="nearest")

    def forward(self, x):
        output1, output2, output3 = x

        output1, x = self.convSets_1(output1)
        x = self.conv_1(x)
        x = self.upsample_1(x)
        x = torch.cat((x, output2), dim=1)

        output2, x = self.convSets_2(x)
        x = self.conv_2(x)
        x = self.upsample_2(x)
        x = torch.cat((x, output3), dim=1)
        output3, x = self.convSets_3(x)
        return output1, output2, output3

定义 YoloHead

class YoloHead(Module):
    def __init__(self, class_nums, anchor_nums):
        super(YoloHead, self).__init__()
        self.head_1 = Sequential(
            DBl(inplane=512, plane=1024, kernel_size=3, padding=1),
            Conv2d(in_channels=1024, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))

        self.head_2 = Sequential(
            DBl(inplane=256, plane=512, kernel_size=3, padding=1),
            Conv2d(in_channels=512, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))

        self.head_3 = Sequential(
        DBl(inplane=128, plane=256, kernel_size=3, padding=1),
        Conv2d(in_channels=256, out_channels=anchor_nums * (5 + class_nums), kernel_size=1, bias=True))

    def forward(self, x):
        output1, output2, output3 = x
        output1 = self.head_1(output1)
        output2 = self.head_2(output2)
        output3 = self.head_3(output3)
        return output1, output2, output3

定义 YoloV3

class YoloV3(Module):
    def __init__(self, class_nums=class_nums, anchor_nums=anchor_nums, train=False, anchor=anchor, anchor_mask=masks):
        super(YoloV3, self).__init__()
        self.back_bone = Darknet()
        self.neck = YoloNeck()
        self.head = YoloHead(class_nums=class_nums, anchor_nums=anchor_nums)
        self.train = train
        self.class_nums = class_nums
        self.anchors_nums = anchor_nums
        self.anchor = anchor
        self.anchor_mask = anchor_mask

    def forward(self, x):
        output1, output2, output3 = self.back_bone(x)
        output1, output2, output3 = self.neck((output1, output2, output3))
        output1, output2, output3 = self.head((output1, output2, output3))
        # print(output1.size(), output2.size(), output3.size())
        output1 = decode_pred(output1, self.anchor[[mask for mask in self.anchor_mask[0]]], self.class_nums, 32.0,
                              self.train)  # output1感受野最大
        output2 = decode_pred(output2, self.anchor[[mask for mask in self.anchor_mask[1]]], self.class_nums, 16.0,
                              self.train)  # output2感受野居中
        output3 = decode_pred(output3, self.anchor[[mask for mask in self.anchor_mask[2]]], self.class_nums, 8.0,
                              self.train)  # output3感受野最小
        if self.train:
            return output1, output2, output3
        return yolo_nms((output1, output2, output3))

上面的代码已经实现了图片在Yolov3中的正向传播, 并且分为 训练时的输出和非训练时的输出, 接下来的代码为帮助构建网络的代码

对输出进行解析

# 将输出分解成 bbox, pred_xy, pred_wh
def decode_pred(pred, anchor, class_nums, stride, train=False):
    # 获得网格长度与宽度 是正方形所以只取一个
    grid_size = pred.size(2)
    # 改变形状
    pred = pred.view(pred.size(0), 3, class_nums + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
    # 假设传入anchor.size() = 3 x 2
    pred_xy, pred_wh, pred_conf, pred_cls = torch.split(pred, (2, 2, 1, class_nums), dim=-1)
    # 根据论文 对其使用激活函数
    pred_xy = torch.sigmoid(pred_xy)
    pred_conf = torch.sigmoid(pred_conf)
    pred_cls = torch.sigmoid(pred_cls)
    if train:
        return pred_xy, pred_wh, pred_conf, pred_cls
    # anchor映射到特征图上
    anchor = anchor / stride
    # 利用广播机制进行计算
    anchor = torch.reshape(anchor, shape=(1, 3, 1, 1, 2))
    # 获得网格
    vx, vy = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
    grid = torch.stack((vx, vy), 2).view((1, 1, grid_size, grid_size, 2))
    bbox_xy = pred_xy + grid
    bbox_wh = anchor * torch.exp(pred_wh)
    # 通过最后一个维度拼接起来了 bbox[..., x,y,w,h]
    bbox = torch.cat((bbox_xy, bbox_wh), dim=-1)
    return bbox, pred_conf, pred_cls

非最大值抑制

# 非最大值抑制
def yolo_nms(output, iou_thresh=0.5):
    b = []
    c = []
    cls = []
    for o in output:
        b.append(o[0].view(o[0].size(0), -1, 4))
        c.append(o[1].view(o[1].size(0), -1, 1))
        cls.append(o[2].view(o[2].size(0), -1, o[2].size(-1)))
    bbox = torch.cat(b, dim=1)
    conf = torch.cat(c, dim=1)
    cls = torch.cat(cls, dim=1)
    scores = conf * cls
    scores = torch.tensor(torch.argmax(scores, dim=-1), dtype=torch.float)
    idxs = []
    for i in range(scores.size(0)):
        idx = nms(bbox[i], scores[i], iou_thresh)
        idxs.append(idx)
    return bbox, conf, cls, idxs

这里会继续更新。。。。。

你可能感兴趣的:(pytorch,cv,yolo,深度学习,神经网络,卷积神经网络,pytorch)