【目标检测】|yolov6 结构代码分析

yolov6s整体:
model/yolo.py

Model:

class Model(nn.Module):
    def __init__():
        self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)

    def forward(self, x):

        x = self.backbone(x)
        x = self.neck(x)
        x = self.detect(x)
        return x 

在build_network中生成backbone,neck detect (其中,backbone为EfficientRep)

EfficientRep位于yolov6/model/efficentrep.py


def build_network(config, channels, num_classes, anchors, num_layers):

        backbone = EfficientRep(
            in_channels=channels,
            channels_list=channels_list,
            num_repeats=num_repeat,
            block=block
        )

        neck = NECK(
            channels_list=channels_list,
            num_repeats=num_repeat,
            block=block
        )

    head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max)

    head = Detect(num_classes, anchors, num_layers, head_layers=head_layers, use_dfl=use_dfl)

【目标检测】|yolov6 结构代码分析_第1张图片

backbone

    block=RepVGGBlock
class EfficientRep(nn.Module):

    def __init__(
        self,
        in_channels=3,
        block=RepVGGBlock
    ):
        super().__init__()


        self.stem = RepVGGBlock( )
        self.ERBlock_2 = nn.Sequential(
            RepVGGBlock(),
            RepBlock(
                in_channels=channels_list[1],
                out_channels=channels_list[1],
                n=num_repeats[1],
                block=RepVGGBlock,
            )
        )

        self.ERBlock_3 = nn.Sequential(
            RepVGGBlock(  ),
            RepBlock(
                in_channels=channels_list[2],
                out_channels=channels_list[2],
                n=num_repeats[2],
                block=RepVGGBlock,
            )
        )

        self.ERBlock_4 = nn.Sequential(
            RepVGGBlock(  ),
            RepBlock(
                in_channels=channels_list[3],
                out_channels=channels_list[3],
                n=num_repeats[3],
                block=RepVGGBlock,
            )
        )

        self.ERBlock_5 = nn.Sequential(
            RepVGGBlock(  ),
            RepBlock(
                in_channels=channels_list[4],
                out_channels=channels_list[4],
                n=num_repeats[4],
                block=RepVGGBlock,
            ),
            SimSPPF(
                in_channels=channels_list[4],
                out_channels=channels_list[4],
                kernel_size=5
            )
        )

    def forward(self, x):

        outputs = []
        x = self.stem(x)
        x = self.ERBlock_2(x)
        x = self.ERBlock_3(x)
        outputs.append(x)
        x = self.ERBlock_4(x)
        outputs.append(x)
        x = self.ERBlock_5(x)
        outputs.append(x)

        return tuple(outputs)

stem=RepVGGBlock

RepVGGBlock RVB

RepVGGBlock(
      (nonlinearity): ReLU(inplace=True)
      (se): Identity()
      (rbr_dense): Sequential(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      )
      (rbr_1x1): Sequential(
        (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      )

RepVGGBlock
在yolov6/layer/common.py中,deploy表示推理过程。

class RepVGGBlock(nn.Module):
    '''RepVGGBlock is a basic rep-style block, including training and deploy status
    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
    '''
    def __init__
        if deploy:
            self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
                                         padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
        else:
            self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None
            self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
            self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)
            
    def forward(self, inputs):
        return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))

训练过程包含33 11 及bn层,三个分支相加输出,在部署时,为了方便部署,直接取3*3的主分支卷积输出。
【目标检测】|yolov6 结构代码分析_第2张图片
【目标检测】|yolov6 结构代码分析_第3张图片

RepBlock为N个RepVGG Block
RB为几个RVB的串联,其中第一个RVB用于特征层的size变化(stride=2,下采样),后面N个RVB 用于特征层的融合,size保持不变。

ERBlock2
一个RVB +RB(1RVB+1 RVB)
ERBlock3
一个RVB +RB(1RVB+3 RVB)
ERBlock4
一个RVB +RB(1RVB+5 RVB)
ERBlock_5
一个RVB +RB (1RVB+1 RVB)+SimSPPF

class SimSPPF(nn.Module):
    '''Simplified SPPF with ReLU activation'''
    def __init__(self, in_channels, out_channels, kernel_size=5):
        super().__init__()
        c_ = in_channels // 2  # hidden channels
        self.cv1 = SimConv(in_channels, c_, 1, 1)
        self.cv2 = SimConv(c_ * 4, out_channels, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2)

    def forward(self, x):
        x = self.cv1(x)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            y1 = self.m(x)
            y2 = self.m(y1)
            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))


其中SConv是有conv+BN+ReLu组成, 先通过一个SConv层,特征图h,w的size不变,outchannel变成inchannel的一半

class SimConv(nn.Module):
    '''Normal Conv with ReLU activation'''
    def __init__():
        padding = kernel_size // 2
        self.conv = nn.Conv2d( )
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.ReLU()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

在这里插入图片描述

在这里插入图片描述

simconv输出做为一个分支,而后经过3个maxpooling层,每个maxpooling的kernel=5,s=1,padding=kernel//2, 每经过一个maxpooling后,fm size均不变,并做为分支。而后通过cat将几个分支在channel维度上相加,得到的size较于SPPF的输入,h,w不变,channel为输入的2倍,最后再通过一个SConv层,通道减半,使得输入和输出的fm size不变。

Neck

【目标检测】|yolov6 结构代码分析_第4张图片
Sconv就是1*1
【目标检测】|yolov6 结构代码分析_第5张图片

class RepPANNeck(nn.Module):
    """RepPANNeck Module
    EfficientRep is the default backbone of this model.
    RepPANNeck has the balance of feature fusion ability and hardware efficiency.
    """

    def __init__(
        block=RepVGGBlock
    ):
        self.Rep_p4 = RepBlock(
            in_channels=channels_list[3] + channels_list[5],
            out_channels=channels_list[5],
            n=num_repeats[5],
            block=block
        )

        self.Rep_p3 = RepBlock(
            in_channels=channels_list[2] + channels_list[6],
            out_channels=channels_list[6],
            n=num_repeats[6],
            block=block
        )

        self.Rep_n3 = RepBlock(
            in_channels=channels_list[6] + channels_list[7],
            out_channels=channels_list[8],
            n=num_repeats[7],
            block=block
        )

        self.Rep_n4 = RepBlock(
            in_channels=channels_list[5] + channels_list[9],
            out_channels=channels_list[10],
            n=num_repeats[8],
            block=block
        )

        self.reduce_layer0 = SimConv(
            in_channels=channels_list[4],
            out_channels=channels_list[5],
            kernel_size=1,
            stride=1
        )

        self.upsample0 = Transpose(
            in_channels=channels_list[5],
            out_channels=channels_list[5],
        )

        self.reduce_layer1 = SimConv(
            in_channels=channels_list[5],
            out_channels=channels_list[6],
            kernel_size=1,
            stride=1
        )

        self.upsample1 = Transpose(
            in_channels=channels_list[6],
            out_channels=channels_list[6]
        )

        self.downsample2 = SimConv(
            in_channels=channels_list[6],
            out_channels=channels_list[7],
            kernel_size=3,
            stride=2
        )

        self.downsample1 = SimConv(
            in_channels=channels_list[8],
            out_channels=channels_list[9],
            kernel_size=3,
            stride=2
        )


    def forward(self, input):

        (x2, x1, x0) = input

        fpn_out0 = self.reduce_layer0(x0)
        upsample_feat0 = self.upsample0(fpn_out0)
        f_concat_layer0 = torch.cat([upsample_feat0, x1], 1)
        f_out0 = self.Rep_p4(f_concat_layer0)

        fpn_out1 = self.reduce_layer1(f_out0)
        upsample_feat1 = self.upsample1(fpn_out1)
        f_concat_layer1 = torch.cat([upsample_feat1, x2], 1)
        pan_out2 = self.Rep_p3(f_concat_layer1)

        down_feat1 = self.downsample2(pan_out2)
        p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
        pan_out1 = self.Rep_n3(p_concat_layer1)

        down_feat0 = self.downsample1(pan_out1)
        p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
        pan_out0 = self.Rep_n4(p_concat_layer2)

        outputs = [pan_out2, pan_out1, pan_out0]

        return outputs

Neck层美团官方称其为Rep-PAN,是基于PAN的拓扑方法,如上图所示,类似一种“U”型结构,其中U型左侧从上到下fm的h,w增大,右侧从下到上fm的h,w减小,其中Upsample上采样基于torch官方自带的转置卷积实现

整个neck层的流程为,U型左侧,从ERB5输出20x20x512的fm,通过SConv 变成20x20x128大小,上采样后h,w较之前增大一倍后与ERB4的输出在channel层上concate后fm变成40x40x384,通过一个RB(s=1, o≠i)后,输出 40x40x128,重复上述步骤后,输出8x080x64的fm。U型右侧,将80x80x64的fm先SConv下采样,得到40x40x64的fm,与U型左侧h,w一致的fm在channel层上concate后,通过一个RB(s=1, o≠i),输出第二个fm,重复U型右侧以上步骤,输出第三个fm。至此,neck层输出三个fm分别为(20x20x256, 40x40x128, 80x80x64).

从结构上来看,其实还是用了YOLO V5这PANET结构,只不过将其中的CSPDarknet换做了RepBlock。

head【目标检测】|yolov6 结构代码分析_第6张图片

【目标检测】|yolov6 结构代码分析_第7张图片

【目标检测】|yolov6 结构代码分析_第8张图片
在这里插入图片描述

整个head借鉴了yolox中的解耦头设计,并对其做了改进,head流程如下:从neck层输出三个分支,对于每个分支,先对输出fm通过BConv层(11),做fm的特征融合后,分成两个分支,一个分支通过Conv(33)+BConv完成分类任务的预测,另外一个分支先通过Conv融合特征后再分成两个分支,一个分支通过BConv完成边框的回归,一个分支通过BConv完成前后背景的分类,至此三个分支再通过concate在channel层上融合,输出未经后处理的预测结果。

class Detect(nn.Module):
    def forward(self, x):
        if self.training:
            cls_score_list = []
            reg_distri_list = []

            for i in range(self.nl):
                x[i] = self.stems[i](x[i])
                cls_x = x[i]
                reg_x = x[i]
                cls_feat = self.cls_convs[i](cls_x)
                cls_output = self.cls_preds[i](cls_feat)
                reg_feat = self.reg_convs[i](reg_x)
                reg_output = self.reg_preds[i](reg_feat)

                cls_output = torch.sigmoid(cls_output)
                cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1)))
                reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1)))
            
            cls_score_list = torch.cat(cls_score_list, axis=1)
            reg_distri_list = torch.cat(reg_distri_list, axis=1)

            return x, cls_score_list, reg_distri_list
        else:
            cls_score_list = []
            reg_dist_list = []
            anchor_points, stride_tensor = generate_anchors(
                x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True)

            for i in range(self.nl):
                b, _, h, w = x[i].shape
                l = h * w
                x[i] = self.stems[i](x[i])
                cls_x = x[i]
                reg_x = x[i]
                cls_feat = self.cls_convs[i](cls_x)
                cls_output = self.cls_preds[i](cls_feat)
                reg_feat = self.reg_convs[i](reg_x)
                reg_output = self.reg_preds[i](reg_feat)
                
                if self.use_dfl:
                    reg_output = reg_output.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3)
                    reg_output = self.proj_conv(F.softmax(reg_output, dim=1))
                
                cls_output = torch.sigmoid(cls_output)
                cls_score_list.append(cls_output.reshape([b, self.nc, l]))
                reg_dist_list.append(reg_output.reshape([b, 4, l]))
            
            cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1)
            reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1)


            pred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh')
            pred_bboxes *= stride_tensor
            return torch.cat(
                [
                    pred_bboxes,
                    torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),
                    cls_score_list
                ],
                axis=-1)

REPVGG融合方法

【目标检测】|yolov6 结构代码分析_第9张图片

ref
https://blog.csdn.net/zqwwwm/article/details/125635594
https://mp.weixin.qq.com/s/RrQCP4pTSwpTmSgvly9evg
https://zhuanlan.zhihu.com/p/353697121

你可能感兴趣的:(目标检测,目标检测,深度学习,人工智能)