Caffe2 - (二十九) Detectron 之 modeling - 基础网络

Caffe2 - (二十九) Detectron 之 modeling - 基础网络

VGG16, VGG_CNN_M_1024,ResNet,ResNeXt.

1. VGG16.py

"""
VGG16 from https://arxiv.org/abs/1409.1556.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg


def add_VGG16_conv5_body(model):
    model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)
    model.Relu('conv1_1', 'conv1_1')
    model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)
    model.Relu('conv1_2', 'conv1_2')
    model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)
    model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)
    model.Relu('conv2_1', 'conv2_1')
    model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)
    model.Relu('conv2_2', 'conv2_2')
    model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)
    model.StopGradient('pool2', 'pool2')
    model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)
    model.Relu('conv3_1', 'conv3_1')
    model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_2', 'conv3_2')
    model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_3', 'conv3_3')
    model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)
    model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv4_1', 'conv4_1')
    model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_2', 'conv4_2')
    model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_3', 'conv4_3')
    model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)
    model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_1', 'conv5_1')
    model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_2', 'conv5_2')
    model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5_3', 'conv5_3')
    return blob_out, 512, 1. / 16.


def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(blob_in, 
                              'pool5', 
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=7,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 4096)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 4096

2. VGG_CNN_M_1024.py

"""
VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg


def add_VGG_CNN_M_1024_conv5_body(model):
    model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)
    model.Relu('conv1', 'conv1')
    model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)
    model.StopGradient('pool1', 'pool1')
    # No updates at conv1 and below (norm1 and pool1 have no params,
    # so we can stop gradients before them, too)
    model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)
    model.Relu('conv2', 'conv2')
    model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)
    model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv3', 'conv3')
    model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4', 'conv4')
    model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5', 'conv5')
    return blob_out, 512, 1. / 16.


def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(blob_in,
                              'pool5',
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=6,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 1024)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 1024

3. ResNet.py

"""
ResNet 和 ResNeXt 的实现.
参考论文:https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg

# ---------------------------------------------------------------------------- #
# Bits for specific architectures (ResNet50, ResNet101, ...)
# ---------------------------------------------------------------------------- #


def add_ResNet50_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6))


def add_ResNet50_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6, 3))


def add_ResNet101_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23))


def add_ResNet101_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23, 3))


def add_ResNet152_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 8, 36, 3))


# ---------------------------------------------------------------------------- #
# 通用 ResNet 组成
# ---------------------------------------------------------------------------- #


def add_stage(model, prefix, blob_in, n, dim_in, dim_out, dim_inner, dilation, stride_init=2):
    """Add a ResNet stage to the model by stacking n residual blocks."""
    # e.g., prefix = res2
    for i in range(n):
        blob_in = add_residual_block(model, '{}_{}'.format(prefix, i), blob_in, dim_in, dim_out,
                                     dim_inner, dilation, stride_init,
                                     inplace_sum=i < n - 1) # 最后一个 block 不使用 inplace; 可能需要外部拉取(fetched externally),或者 FPN 使用. 
        dim_in = dim_out
    return blob_in, dim_in


def add_ResNet_convX_body(model, block_counts, freeze_at=2):
    """
    添加 ResNet body, 从 input data 到 res5(也叫作conv5) 阶段.
    最终的  res5/conv5 阶段可以可选的排除掉(因此, convX, X = 4 or 5).
    """
    assert freeze_at in [0, 2, 3, 4, 5]
    p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
    p = model.AffineChannel(p, 'res_conv1_bn', inplace=True)
    p = model.Relu(p, p)
    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
    dim_in = 64
    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    (n1, n2, n3) = block_counts[:3]
    s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1 )
    if freeze_at == 2:
        model.StopGradient(s, s)
    s, dim_in = add_stage(model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1 )
    if freeze_at == 3:
        model.StopGradient(s, s)
    s, dim_in = add_stage(model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1 )
    if freeze_at == 4:
        model.StopGradient(s, s)
    if len(block_counts) == 4:
        n4 = block_counts[3]
        s, dim_in = add_stage(model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8, 
                              cfg.RESNETS.RES5_DILATION )
        if freeze_at == 5:
            model.StopGradient(s, s)
        return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION
    else:
        return s, dim_in, 1. / 16.


def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):
    """
    添加 RoI 特征变换(e.g., RoI pooling),其后接 res5/conv5 head 对每一个 RoI 进行处理.
    """
    # 待办事项(rbg): This contains Fast R-CNN specific config options making it non-
    # reusable; make this more generic with model-specific wrappers
    model.RoIFeatureTransform(blob_in, 
                              'pool5', 
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)
    s, dim_in = add_stage(model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1, stride_init )
    s = model.AveragePool(s, 'res5_pool', kernel=7)
    return s, 2048


def add_residual_block(model, prefix, blob_in, dim_in, dim_out, dim_inner, dilation, 
                       stride_init=2, inplace_sum=False ):
    """
    添加一个残差模块 a residual block.
    """
    # prefix = res_, e.g., res2_3

    # Max pooling is performed prior to the first stage (which is uniquely
    # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage
    stride = stride_init if (dim_in != dim_out and dim_in != 64 and dilation == 1 ) else 1

    # transformation blob
    tr = globals()[cfg.RESNETS.TRANS_FUNC](model, blob_in, dim_in, dim_out, stride, prefix,
                                           dim_inner, group=cfg.RESNETS.NUM_GROUPS, dilation=dilation )

    # sum -> ReLU
    sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)
    if inplace_sum:
        s = model.net.Sum([tr, sc], tr)
    else:
        s = model.net.Sum([tr, sc], prefix + '_sum')

    return model.Relu(s, s)


def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
    if dim_in == dim_out:
        return blob_in

    c = model.Conv(blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, stride=stride, no_bias=1 )
    return model.AffineChannel(c, prefix + '_branch1_bn')


# ------------------------------------------------------------------------------
# various transformations (may expand and may consider a new helper)
# ------------------------------------------------------------------------------

def bottleneck_transformation(model, blob_in, dim_in, dim_out, stride, 
                              prefix, dim_inner, dilation=1, group=1 ):
    """
    添加 bottleneck transformation.
    """
    # In original resnet, stride=2 is on 1x1.
    # In fb.torch resnet, stride=2 is on 3x3.
    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)

    # conv 1x1 -> BN -> ReLU
    cur = model.ConvAffine(blob_in, prefix + '_branch2a', dim_in, dim_inner,
                           kernel=1, stride=str1x1, pad=0, inplace=True )
    cur = model.Relu(cur, cur)

    # conv 3x3 -> BN -> ReLU
    cur = model.ConvAffine(cur, prefix + '_branch2b', dim_inner, dim_inner,
                           kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation,
                           group=group, inplace=True )
    cur = model.Relu(cur, cur)

    # conv 1x1 -> BN (no ReLU)
    # 注意: AffineChannel op 不能是 in-place 的,因为 Caffe2 对于这样的图graph 的梯度计算存在 bug.
    #       inplace=False
    cur = model.ConvAffine(cur, prefix + '_branch2c', dim_inner, dim_out,
                           kernel=1, stride=1, pad=0, inplace=False )
    return cur

你可能感兴趣的:(Caffe2,Caffe2,Caffe2,Detectron)