MMdetection中backbone的实现-MobileNetV2

以MobileNet_V2为例,看一下在mmdet中backbone是如何实现的,在此之前需要对MobileNet_V2的架构有所了解,可参考MobileNet教程。

目录

一、整体框架

二、初始化参数

三、ConvModule类

四、self.make_layer()


一、整体框架

源码路径为mmdet/models/backbones/mobilenet_v2.py,代码内容及粗略注释如下:

# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from torch.nn.modules.batchnorm import _BatchNorm
from ..builder import BACKBONES
from ..utils import InvertedResidual, make_divisible


@BACKBONES.register_module()
class MobileNetV2(BaseModule):
    """MobileNetV2 backbone.

    Args:
        widen_factor (float): Width multiplier, multiply number of
            channels in each layer by this amount. Default: 1.0.
        out_indices (Sequence[int], optional): Output from which stages.
            Default: (1, 2, 4, 7).
        frozen_stages (int): Stages to be frozen (all param fixed).
            Default: -1, which means not freezing any parameters.
        conv_cfg (dict, optional): Config dict for convolution layer.
            Default: None, which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer.
            Default: dict(type='BN').
        act_cfg (dict): Config dict for activation layer.
            Default: dict(type='ReLU6').
        norm_eval (bool): Whether to set norm layers to eval mode, namely,
            freeze running stats (mean and var). Note: Effect on Batch Norm
            and its variants only. Default: False.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.
        pretrained (str, optional): model pretrained path. Default: None
        init_cfg (dict or list[dict], optional): Initialization config dict.
            Default: None
    """

    # Parameters to build layers. 4 parameters are needed to construct a
    # layer, from left to right: expand_ratio, channel, num_blocks, stride.
    # 网络配置参数
    arch_settings = [[1, 16, 1, 1],
                     [6, 24, 2, 2],
                     [6, 32, 3, 2],
                     [6, 64, 4, 2],
                     [6, 96, 3, 1],
                     [6, 160, 3, 2],
                     [6, 320, 1, 1]]
    # 初始化参数
    def __init__(self,
                 # MobileNet通道扩展因子
                 widen_factor=1.,
                 # 输出特征矩阵所在特征层的索引
                 out_indices=(1, 2, 4, 7),
                 # 冻结层索引
                 frozen_stages=-1,
                 conv_cfg=None,
                 # 归一化配置
                 norm_cfg=dict(type='BN'),
                 # 激活函数配置
                 act_cfg=dict(type='ReLU6'),
                 norm_eval=False,
                 with_cp=False,
                 pretrained=None,
                 # 初始化父类初始化函数的配置
                 init_cfg=None):
        # 继承父类,即BaseModule的初始化函数
        super(MobileNetV2, self).__init__(init_cfg)
        self.pretrained = pretrained
        
        assert not (init_cfg and pretrained), \
            'init_cfg and pretrained cannot be specified at the same time'
       
        if isinstance(pretrained, str):
            warnings.warn('DeprecationWarning: pretrained is deprecated, '
                          'please use "init_cfg" instead')
            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)

        elif pretrained is None:
            if init_cfg is None:
                self.init_cfg = [
                    dict(type='Kaiming', layer='Conv2d'),
                    dict(
                        type='Constant',
                        val=1,
                        layer=['_BatchNorm', 'GroupNorm'])
                ]
        else:
            raise TypeError('pretrained must be a str or None')

        self.widen_factor = widen_factor
        self.out_indices = out_indices

        # 特征层索引只能为0~7
        if not set(out_indices).issubset(set(range(0, 8))):
            raise ValueError('out_indices must be a subset of range'
                             f'(0, 8). But received {out_indices}')
        # 规定冻结层的索引范围
        if frozen_stages not in range(-1, 8):
            raise ValueError('frozen_stages must be in range(-1, 8). '
                             f'But received {frozen_stages}')

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp

        self.in_channels = make_divisible(32 * widen_factor, 8)
        # 构建第一个卷积层,其中包括卷积+BN+激活函数
        self.conv1 = ConvModule(
            in_channels=3,
            out_channels=self.in_channels,
            kernel_size=3,
            stride=2,
            padding=1,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)
        
        self.layers = []

        # 根据网络配置参数循环构建网络
        for i, layer_cfg in enumerate(self.arch_settings):
            expand_ratio, channel, num_blocks, stride = layer_cfg
            out_channels = make_divisible(channel * widen_factor, 8)
            # 构建倒残差层(可包含多个倒残差模块)
            inverted_res_layer = self.make_layer(
                out_channels=out_channels,
                num_blocks=num_blocks,
                stride=stride,
                expand_ratio=expand_ratio)
            # 命名每一个层结构
            layer_name = f'layer{i + 1}'
            # 将倒残差模块作为值,其名称作为键,传入一个有序字典
            self.add_module(layer_name, inverted_res_layer)
            # 将倒残差模块名称存入列表
            self.layers.append(layer_name)

        if widen_factor > 1.0:
            self.out_channel = int(1280 * widen_factor)
        else:
            self.out_channel = 1280
        # 构建最后一个卷积层
        layer = ConvModule(
            in_channels=self.in_channels,
            out_channels=self.out_channel,
            kernel_size=1,
            stride=1,
            padding=0,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)
        self.add_module('conv2', layer)
        self.layers.append('conv2')

    def make_layer(self, out_channels, num_blocks, stride, expand_ratio):
        """Stack InvertedResidual blocks to build a layer for MobileNetV2.

        Args:
            out_channels (int): out_channels of block.
            num_blocks (int): number of blocks.
            stride (int): stride of the first block. Default: 1
            expand_ratio (int): Expand the number of channels of the
                hidden layer in InvertedResidual by this ratio. Default: 6.
        """
        layers = []
        # 根据网络配置参数中倒残差模块的个数构建层,具体参见MobileNet教程
        for i in range(num_blocks):
            if i >= 1:
                stride = 1
            layers.append(
                # 直接调用InvertedResidual模块循环搭建倒残差模块
                InvertedResidual(
                    self.in_channels,
                    out_channels,
                    mid_channels=int(round(self.in_channels * expand_ratio)),
                    stride=stride,
                    with_expand_conv=expand_ratio != 1,
                    conv_cfg=self.conv_cfg,
                    norm_cfg=self.norm_cfg,
                    act_cfg=self.act_cfg,
                    with_cp=self.with_cp))
            self.in_channels = out_channels
        return nn.Sequential(*layers)
    # 根据传入的冻结层索引冻结相应的层
    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            for param in self.conv1.parameters():
                param.requires_grad = False
        for i in range(1, self.frozen_stages + 1):
            layer = getattr(self, f'layer{i}')
            layer.eval()
            for param in layer.parameters():
                param.requires_grad = False
    # 前向传播函数
    def forward(self, x):
        """Forward function."""
        x = self.conv1(x)
        outs = []
        for i, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
            # 根据需要哪些特征层对应特征图的索引,将特征图存储到列表中
            # 这些特征图将被传入rpn或其他结构进行目标框预测
            if i in self.out_indices:
                outs.append(x)
        return tuple(outs)
    # 在冻结BN层的同时将模型转变为训练模式
    def train(self, mode=True):
        """Convert the model into training mode while keep normalization layer
        frozen."""
        super(MobileNetV2, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

二、初始化参数

在配置文件中backbone部分的参数就是对应的初始化参数。以ssdlite框架为例(路径为configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py),配置文件中backbone部分如下:

backbone=dict(
    # backbone名称
    type='MobileNetV2',
    # 输出特征矩阵所在特征层的索引
    out_indices=(4, 7),
    # 对正则化的配置
    norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
    # 对父类初始化函数的配置
    init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),

backbone中的type指的是使用的网络名称,mmdet支持的所有backbone名称均可在mmdet/models/backbones/__init__.py文件中找到:

# Copyright (c) OpenMMLab. All rights reserved.
from .csp_darknet import CSPDarknet
from .darknet import Darknet
from .detectors_resnet import DetectoRS_ResNet
from .detectors_resnext import DetectoRS_ResNeXt
from .hourglass import HourglassNet
from .hrnet import HRNet
from .mobilenet_v2 import MobileNetV2
from .regnet import RegNet
from .res2net import Res2Net
from .resnest import ResNeSt
from .resnet import ResNet, ResNetV1d
from .resnext import ResNeXt
from .ssd_vgg import SSDVGG
from .swin import SwinTransformer
from .trident_resnet import TridentResNet

__all__ = [
    'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet',
    'MobileNetV2', 'Res2Net', 'HourglassNet', 'DetectoRS_ResNet',
    'DetectoRS_ResNeXt', 'Darknet', 'ResNeSt', 'TridentResNet', 'CSPDarknet',
    'SwinTransformer'
]

其余参数将被传入MobileNetV2类中用于创建MobileNet模型。

三、ConvModule类

该类用于构建卷积层,包含卷积+归一化+激活函数,其中也包括很多函数没有细讲,掌握该类即可自行构建卷积层,在此不再深究原理,英语好的小伙伴可直接看英文注释,比较详细,另外也附注了中文注释:

import warnings
import torch.nn as nn
from ..utils import constant_init, kaiming_init
from .activation import build_activation_layer
from .conv import build_conv_layer
from .norm import build_norm_layer
from .padding import build_padding_layer
from .registry import PLUGIN_LAYERS


@PLUGIN_LAYERS.register_module()
class ConvModule(nn.Module):
    """A conv block that bundles conv/norm/activation layers.

    This block simplifies the usage of convolution layers, which are commonly
    used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
    It is based upon three build methods: `build_conv_layer()`,
    `build_norm_layer()` and `build_activation_layer()`.

    Besides, we add some additional features in this module.
    1. Automatically set `bias` of the conv layer.
    2. Spectral norm is supported.
    3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
    supports zero and circular padding, and we add "reflect" padding mode.

    Args:
        in_channels (int): Number of channels in the input feature map.
            Same as that in ``nn._ConvNd``.
        out_channels (int): Number of channels produced by the convolution.
            Same as that in ``nn._ConvNd``.
        kernel_size (int | tuple[int]): Size of the convolving kernel.
            Same as that in ``nn._ConvNd``.
        stride (int | tuple[int]): Stride of the convolution.
            Same as that in ``nn._ConvNd``.
        padding (int | tuple[int]): Zero-padding added to both sides of
            the input. Same as that in ``nn._ConvNd``.
        dilation (int | tuple[int]): Spacing between kernel elements.
            Same as that in ``nn._ConvNd``.
        groups (int): Number of blocked connections from input channels to
            output channels. Same as that in ``nn._ConvNd``.
        bias (bool | str): If specified as `auto`, it will be decided by the
            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
            False. Default: "auto".
        conv_cfg (dict): Config dict for convolution layer. Default: None,
            which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer. Default: None.
        act_cfg (dict): Config dict for activation layer.
            Default: dict(type='ReLU').
        inplace (bool): Whether to use inplace mode for activation.
            Default: True.
        with_spectral_norm (bool): Whether use spectral norm in conv module.
            Default: False.
        padding_mode (str): If the `padding_mode` has not been supported by
            current `Conv2d` in PyTorch, we will use our own padding layer
            instead. Currently, we support ['zeros', 'circular'] with official
            implementation and ['reflect'] with our own implementation.
            Default: 'zeros'.
        order (tuple[str]): The order of conv/norm/activation layers. It is a
            sequence of "conv", "norm" and "act". Common examples are
            ("conv", "norm", "act") and ("act", "conv", "norm").
            Default: ('conv', 'norm', 'act').
    """

    _abbr_ = 'conv_block'

    def __init__(self,
                 # 输入通道数
                 in_channels,
                 # 输出通道数
                 out_channels,
                 # 卷积核尺寸
                 kernel_size,
                 # 步距
                 stride=1,
                 # 填充
                 padding=0,
                 # 卷积核中元素间隔的距离,大于1则为空洞卷积
                 dilation=1,
                 # 将通道分成的组数
                 groups=1,
                 # 偏置
                 bias='auto',
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=dict(type='ReLU'),
                 inplace=True,
                 # 是否使用谱归一化
                 with_spectral_norm=False,
                 padding_mode='zeros',
                 order=('conv', 'norm', 'act')):
        super(ConvModule, self).__init__()

        assert conv_cfg is None or isinstance(conv_cfg, dict)
        assert norm_cfg is None or isinstance(norm_cfg, dict)
        assert act_cfg is None or isinstance(act_cfg, dict)
        # 官方提供的两种填充模式
        official_padding_mode = ['zeros', 'circular']
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.inplace = inplace
        self.with_spectral_norm = with_spectral_norm
        self.with_explicit_padding = padding_mode not in official_padding_mode
        self.order = order
        assert isinstance(self.order, tuple) and len(self.order) == 3
        assert set(order) == set(['conv', 'norm', 'act'])

        self.with_norm = norm_cfg is not None
        self.with_activation = act_cfg is not None
        # if the conv layer is before a norm layer, bias is unnecessary.
        # 若卷积之后有归一化层,则没有必要使用偏置
        if bias == 'auto':
            bias = not self.with_norm
        self.with_bias = bias
        # 如果同时使用归一化和偏置会输出警告信息
        if self.with_norm and self.with_bias:
            warnings.warn('ConvModule has norm and bias at the same time')
        # 若不采用官方提供的填充模式,则自定义填充方式
        if self.with_explicit_padding:
            pad_cfg = dict(type=padding_mode)
            self.padding_layer = build_padding_layer(pad_cfg, padding)

        # reset padding to 0 for conv module
        conv_padding = 0 if self.with_explicit_padding else padding
        # build convolution layer
        # 构建卷积层
        self.conv = build_conv_layer(
            conv_cfg,
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=conv_padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        # export the attributes of self.conv to a higher level for convenience
        # 为了方便,将self.conv的属性赋值给更高一级的属性,即ConvModule类属性
        self.in_channels = self.conv.in_channels
        self.out_channels = self.conv.out_channels
        self.kernel_size = self.conv.kernel_size
        self.stride = self.conv.stride
        self.padding = padding
        self.dilation = self.conv.dilation
        self.transposed = self.conv.transposed
        self.output_padding = self.conv.output_padding
        self.groups = self.conv.groups
        # 使用谱归一化
        if self.with_spectral_norm:
            self.conv = nn.utils.spectral_norm(self.conv)

        # build normalization layers
        # 构建归一化层
        if self.with_norm:
            # norm layer is after conv layer
            # 判断归一化层在卷积层之前还是之后
            if order.index('norm') > order.index('conv'):
                norm_channels = out_channels
            else:
                norm_channels = in_channels
            self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
            self.add_module(self.norm_name, norm)
        else:
            self.norm_name = None

        # build activation layer
        # 构建激活层
        if self.with_activation:
            act_cfg_ = act_cfg.copy()
            # nn.Tanh has no 'inplace' argument
            if act_cfg_['type'] not in [
                    'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish'
            ]:
                act_cfg_.setdefault('inplace', inplace)
            self.activate = build_activation_layer(act_cfg_)

        # Use msra init by default
        # 使用默认的参数初始化方法
        self.init_weights()

    @property
    def norm(self):
        if self.norm_name:
            return getattr(self, self.norm_name)
        else:
            return None
    # 参数初始化方法
    def init_weights(self):
        # 1. It is mainly for customized conv layers with their own
        #    initialization manners by calling their own ``init_weights()``,
        #    and we do not want ConvModule to override the initialization.
        # 2. For customized conv layers without their own initialization
        #    manners (that is, they don't have their own ``init_weights()``)
        #    and PyTorch's conv layers, they will be initialized by
        #    this method with default ``kaiming_init``.
        # Note: For PyTorch's conv layers, they will be overwritten by our
        #    initialization implementation using default ``kaiming_init``.
        if not hasattr(self.conv, 'init_weights'):
            if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
                nonlinearity = 'leaky_relu'
                a = self.act_cfg.get('negative_slope', 0.01)
            else:
                nonlinearity = 'relu'
                a = 0
            kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
        if self.with_norm:
            constant_init(self.norm, 1, bias=0)
    # 定义前向传播函数
    def forward(self, x, activate=True, norm=True):
        for layer in self.order:
            if layer == 'conv':
                if self.with_explicit_padding:
                    x = self.padding_layer(x)
                x = self.conv(x)
            elif layer == 'norm' and norm and self.with_norm:
                x = self.norm(x)
            elif layer == 'act' and activate and self.with_activation:
                x = self.activate(x)
        return x

四、self.make_layer()

该方法根据直接根据网络配置参中每一层残差块的重复个数,调用InvertedResidual()方法循环搭建倒残差模块。在MMdetection中预定义好了一些基础模块,可以直接调用,具体参见mmdet/models/utils/__init__.py

# Copyright (c) OpenMMLab. All rights reserved.
from .builder import build_linear_layer, build_transformer
from .conv_upsample import ConvUpsample
from .csp_layer import CSPLayer
from .gaussian_target import gaussian_radius, gen_gaussian_target
from .inverted_residual import InvertedResidual
from .make_divisible import make_divisible
from .misc import interpolate_as
from .normed_predictor import NormedConv2d, NormedLinear
from .positional_encoding import (LearnedPositionalEncoding,
                                  SinePositionalEncoding)
from .res_layer import ResLayer, SimplifiedBasicBlock
from .se_layer import SELayer
from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer,
                          DynamicConv, Transformer)

__all__ = [
    'ResLayer', 'gaussian_radius', 'gen_gaussian_target',
    'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'Transformer',
    'build_transformer', 'build_linear_layer', 'SinePositionalEncoding',
    'LearnedPositionalEncoding', 'DynamicConv', 'SimplifiedBasicBlock',
    'NormedLinear', 'NormedConv2d', 'make_divisible', 'InvertedResidual',
    'SELayer', 'interpolate_as', 'ConvUpsample', 'CSPLayer'
]

以上就是MMdetection中backbone实现的基本流程,可参考上述流程进行自定义backbone搭建,当然自定义涉及到一些注册机制,等以后我搞懂了再更新。本人初学者,如有纰漏之处还请批评指正!

你可能感兴趣的:(目标检测,MMdetection,目标检测,python,深度学习,计算机视觉)