Caffe2 - (二十七) Detectron 之 modeling - detector

Caffe2 - (二十七) Detectron 之 modeling - detector

Detecton 定义了一个 DetectionModelHelper 类,来表示 Detectron 模型.

"""
Defines DetectionModelHelper, 
the class that represents a Detectron model.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import logging

from caffe2.python import cnn
from caffe2.python import core
from caffe2.python import workspace

from core.config import cfg
# 定制的 Python Ops
from ops.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
from ops.generate_proposal_labels import GenerateProposalLabelsOp
from ops.generate_proposals import GenerateProposalsOp
# lr 策略
from utils import lr_policy
import roi_data.fast_rcnn
import utils.c2 as c2_utils

logger = logging.getLogger(__name__)


class DetectionModelHelper(cnn.CNNModelHelper): # 父类 - cnn.CNNModelHelper
    def __init__(self, **kwargs):
        # DetectionModelHelper 的参数处理,其它的参数送入 cnn.CNNModelHelper
        self.train = kwargs.get('train', False)
        self.num_classes = kwargs.get('num_classes', -1)
        assert self.num_classes > 0, 'num_classes must be > 0'
        for k in ('train', 'num_classes'):
            if k in kwargs:
                del kwargs[k]
        kwargs['order'] = 'NCHW'
        # 安全性起见,设置 cudnn_exhaustive_search 为 False,以免 CNNModelHelper 默认值出现改变.
        # detection 代码使用可变尺寸输入(variable size inputs), 
        # cudnn_exhaustive_search 为 True时,可能不太友好.
        kwargs['cudnn_exhaustive_search'] = False
        super(DetectionModelHelper, self).__init__(**kwargs)
        self.roi_data_loader = None
        self.losses = []
        self.metrics = []
        self.do_not_update_params = []  # 不进行更新的参数列表,此列表内的参数不进行更新
        self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE # dag
        self.net.Proto().num_workers = cfg.NUM_GPUS * 4 # 每个 GPU, 4 个worker
        self.prev_use_cudnn = self.use_cudnn

    def TrainableParams(self, gpu_id=-1):
        """
        所有训练参数的 blobs names,可能根据 GPU id 来过滤.
        """
        return [p for p in self.params if (p in self.param_to_grad and   # p has a gradient
                                           p not in self.do_not_update_params and  # not on the blacklist
                                           (gpu_id == -1 or  # filter for gpu assignment, if gpu_id set
                                            str(p).find('gpu_{}'.format(gpu_id)) == 0) )]

    def AffineChannel(self, blob_in, blob_out, share_with=None, inplace=False):
        """
        仿射变换(affine transformation),替代网络中的 BN,是 BN 层不被使用.
        (e.g., 因为 minibatch 太小).

        通过指定 share_with 参数中 blob name(排除 '_{s,b}' 后缀),
        可以将 AffineChannel 参数与其它 AffineChannelOp 共享.
        用于减少内存.
        """
        blob_out = blob_out or self.net.NextName()
        is_not_sharing = share_with is None
        param_prefix = blob_out if is_not_sharing else share_with
        scale = core.ScopedBlobReference(param_prefix + '_s', self.param_init_net)
        bias = core.ScopedBlobReference(param_prefix + '_b', self.param_init_net)
        if is_not_sharing:
            self.net.Proto().external_input.extend([str(scale), str(bias)])
            self.params.extend([scale, bias])
            self.weights.append(scale)
            self.biases.append(bias)
        if inplace:
            return self.net.AffineChannel([blob_in, scale, bias], blob_in)
        else:
            return self.net.AffineChannel([blob_in, scale, bias], blob_out)

    def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):
        """
        Python Op - 生成 RPN proposals.
        - 输入 blobs_in:
            - rpn_cls_probs: 4D tensor, shape (N, A, H, W)
                其中, N - minibatch images 数
                    A - 每个位置(per locations)的 anchors 数
                    (H, W) - 预测网格(grid) 的空间尺寸
                每个值表示一个物体的概率(probability of object), [0, 1]
            - rpn_cls_pred: 4D tensor, shape (N, 4*A, H, W)
                将 anchor boxes 变换为 RPN proposals 的预测的 deltas 值.
            - im_info: 2D tensor, shape (N, 3)
                其中,3 列值分别是输入图片的 [height, width, scale].
                height 和 width 是网络的输入.
                scale 是将原始图片缩放到网络输入尺寸的缩放因子 scale factor.

        - 输出 blobs_out:
            - rpn_rois: 2D tensor, shape (R, 5)
                对于 R 个 PRN proposals, 5 列值分别为 [batch ind, x1, y1, x2, y2].
                boxes 是关于网络输入尺寸的,即:原始图片的 scaled 版本.
                这些 proposals 必须缩放scaled: 1/scale (其中,scale 来自 im_info) 以变换到原始输入图片的坐标系.
            - rpn_rois_probs: 1D tensor, objectness 概率分数(probability scores). (从 rpn_cls_probs 提取得到.)
        """
        name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])
        self.net.Python(GenerateProposalsOp(anchors, spatial_scale, self.train).forward)(blobs_in, blobs_out, name=name)
        # Caffe2 提供了 high-level 接口,用于 Python ops 创建 —— Net.Python() 接口.
        return blobs_out

    def GenerateProposalLabels(self, blobs_in):
        """
        Python Op - 生成 RPN proposals 的训练 labels.
                  - 用于与 Fast/Mask R-CNN 联合训练时的 RPN 训练(如end-to-end Faster R-CNN 训练)

        - 输入blobs - blobs_in:
            - rpn_rois: GenerateProposals 输出的 RPN proposals,2D tensor.
            - roidb: 待 labeled 的 roidb entries.
            - im_info: 参考 GenerateProposals 文档.

        - 输出blobs - blobs_out:
            - (blobs 的 variable set): 返回模型训练需要的 blobs.
             通过查询 data loader 来返回需要的 blobs 列表list.
        """
        name = 'GenerateProposalLabelsOp:' + ','.join([str(b) for b in blobs_in])

        # 在运行前,blobs 列表是未知的,因为其由训练的指定模型来确定.
        # 查询 data loader 以得到输出 blobs names 的列表.
        blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=self.train)
        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

        self.net.Python(GenerateProposalLabelsOp().forward)(blobs_in, blobs_out, name=name)
        return blobs_out

    def CollectAndDistributeFpnRpnProposals(self):
        """
        Python Op - 将 multiple FPN levels 生成的 RPN proposals 进行合并,
                    并分发 FPN proposals 到对应的 FPN levels.
                  - 一个 FPN level 的 anchor 预测的一个 RoI 可能映射到另一个 FPN level,
                    因此需要重新分发 FPN proposals.

        - Input Blobs:
            - [rpn_rois_fpn, ..., rpn_rois_fpn, 
               rpn_roi_probs_fpn, ..., rpn_roi_probs_fpn]
              其中,
              rpn_rois_fpn - FPN level i 的 RPN proposals.
              rpn_roi_probs_fpn` - FPN level i 的 RPN objectness 概率.

              训练阶段使用时,Input Blobs 还包括:[roidb, im_info].

        - Output blobs:
            - [rois_fpn, ..., rois_rpn, rois, rois_idx_restore]
            其中,
            rois_fpn - FPN level i 的 RPN proposals.
            rois_idx_restore - 所有 rois_fpn, i=min...max 组合的排列序列,
                              用于将 RPN RoIs 恢复到 Input Blobs 原来的顺序.

            训练阶段使用时,Output Blobs 还包括: [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
        """

        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL

        # 准备 input blobs
        rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
        score_names = ['rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)]
        blobs_in = rois_names + score_names
        if self.train:
            blobs_in += ['roidb', 'im_info']
        blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]
        name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join([str(b) for b in blobs_in])

        # 准备 output blobs
        blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=self.train)
        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]

        outputs = self.net.Python(CollectAndDistributeFpnRpnProposalsOp(self.train).forward)(blobs_in, blobs_out, name=name)

        return outputs

    def DropoutIfTraining(self, blob_in, dropout_rate):
        """
        如果 model 处于训练模式,则添加 dropout,且 dropout_rate > 0.
        """
        blob_out = blob_in
        if self.train and dropout_rate > 0:
            blob_out = self.Dropout(blob_in, blob_in, ratio=dropout_rate, is_test=False)
        return blob_out

    def RoIFeatureTransform(self, blobs_in, blob_out, blob_rois='rois', method='RoIPoolF',
                            resolution=7, spatial_scale=1. / 16., sampling_ratio=0):
        """
        添加指定的 RoI Pooling 方法.
        一些 RoI 变换方法支持 sampling_ratio 参数,有一些则不支持.

        RoIFeatureTransform abstracts away:
          - Use of FPN or not
          - Specifics of the transform method
        """
        assert method in {'RoIPoolF', 'RoIAlign'}, 'Unknown pooling method: {}'.format(method)
        has_argmax = (method == 'RoIPoolF')
        if isinstance(blobs_in, list):
            # FPN case: add RoIFeatureTransform to each FPN level
            k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid
            k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid
            assert len(blobs_in) == k_max - k_min + 1
            bl_out_list = []
            for lvl in range(k_min, k_max + 1):
                bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order 倒序
                sc = spatial_scale[k_max - lvl]  # in reversed order
                bl_rois = blob_rois + '_fpn' + str(lvl)
                bl_out = blob_out + '_fpn' + str(lvl)
                bl_out_list.append(bl_out)
                bl_argmax = ['_argmax_' + bl_out] if has_argmax else []
                self.net.__getattr__(method)([bl_in, bl_rois], 
                                             [bl_out] + bl_argmax,
                                             pooled_w=resolution,
                                             pooled_h=resolution,
                                             spatial_scale=sc,
                                             sampling_ratio=sampling_ratio)
            # 所有 levels 的 pooled features 沿着 batch dimension 连接为单个 4D tensor. 
            xform_shuffled, _ = self.net.Concat(bl_out_list, 
                                                [blob_out + '_shuffled', '_concat_' + blob_out],
                                                axis=0 )
            # Unshuffle to match rois from dataloader
            restore_bl = blob_rois + '_idx_restore_int32'
            xform_out = self.net.BatchPermutation([xform_shuffled, restore_bl], blob_out )
        else:
            # 单个 feature level
            bl_argmax = ['_argmax_' + blob_out] if has_argmax else []
            # RoIPoolF 忽略 sampling_ratio
            xform_out = self.net.__getattr__(method)([blobs_in, blob_rois], 
                                                     [blob_out] + bl_argmax,
                                                     pooled_w=resolution,
                                                     pooled_h=resolution,
                                                     spatial_scale=spatial_scale,
                                                     sampling_ratio=sampling_ratio )
        # 只返回第一个 blob (transformed features)
        return xform_out

    def ConvShared(self, blob_in, blob_out, dim_in, dim_out, kernel, weight=None, bias=None, **kwargs):
        """
        添加 conv op,与其它 conv op 共享 weights 和 biases.
        """
        use_bias = ( False if ('no_bias' in kwargs and kwargs['no_bias']) else True )

        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
            if self.ws_nbytes_limit:
                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit

        if use_bias:
            blobs_in = [blob_in, weight, bias]
        else:
            blobs_in = [blob_in, weight]

        if 'no_bias' in kwargs:
            del kwargs['no_bias']

        return self.net.Conv(blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs )

    def BilinearInterpolation(self, blob_in, blob_out, dim_in, dim_out, up_scale ):
        """
        在尺度空间(space of scale) 双线性插值(bilinear interpolation).

        如果输入:NxKxHxW, 则输出:NxKx(sH)x(sW), 其中 s:= up_scale

        From the CVPR'15 FCN code.
        参见: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
        """
        assert dim_in == dim_out
        assert up_scale % 2 == 0, 'Scale should be even'

        def upsample_filt(size):
            """
            Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
            """
            factor = (size + 1) // 2
            if size % 2 == 1:
                center = factor - 1
            else:
                center = factor - 0.5
            og = np.ogrid[:size, :size]
            return ((1 - abs(og[0] - center) / factor) *
                    (1 - abs(og[1] - center) / factor))

        kernel_size = up_scale * 2
        bil_filt = upsample_filt(kernel_size)

        kernel = np.zeros((dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32)
        kernel[range(dim_out), range(dim_in), :, :] = bil_filt

        blob = self.ConvTranspose(blob_in, blob_out, dim_in, dim_out, kernel_size,
                                  stride=int(up_scale), pad=int(up_scale / 2),
                                  weight_init=('GivenTensorFill', {'values': kernel}),
                                  bias_init=('ConstantFill', {'value': 0.}) )
        self.do_not_update_params.append(self.weights[-1])
        self.do_not_update_params.append(self.biases[-1])
        return blob

    def ConvAffine(self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
                   group=1, dilation=1, weight_init=None, bias_init=None,
                   suffix='_bn', inplace=False ): # 参数顺序与 Conv() 相同
        """
        ConvAffine 添加一个 Conv Op,其后接一个 AffineChannels Op (fine tuning 时代替 BN).
        """
        conv_blob = self.Conv(blob_in, prefix, dim_in, dim_out, kernel, stride=stride,
                              pad=pad, group=group, dilation=dilation,
                              weight_init=weight_init, bias_init=bias_init, no_bias=1)
        blob_out = self.AffineChannel(conv_blob, prefix + suffix, inplace=inplace)
        return blob_out

    def DisableCudnn(self):
        self.prev_use_cudnn = self.use_cudnn
        self.use_cudnn = False

    def RestorePreviousUseCudnn(self):
        prev_use_cudnn = self.use_cudnn
        self.use_cudnn = self.prev_use_cudnn
        self.prev_use_cudnn = prev_use_cudnn

    def UpdateWorkspaceLr(self, cur_iter):
        """
        更新模型的当前学习率和 workspace(learning rate and update history/momentum blobs).
        """
        # lr 位于 workspace 中
        # 所有 GPUs 的 lr 一般相同.
        cur_lr = workspace.FetchBlob('gpu_0/lr')[0]
        new_lr = lr_policy.get_lr_at_iter(cur_iter)
        # Python lr 和 GPU lr 不需要数据类型转换,都是 float32
        # 因此,可以直接精确比较.l
        if cur_lr != new_lr:
            ratio = _get_lr_change_ratio(cur_lr, new_lr)
            if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
                logger.info('Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.
                            format(cur_lr, new_lr, cur_iter))
            self._SetNewLr(cur_lr, new_lr)
        return new_lr

    def _SetNewLr(self, cur_lr, new_lr):
        """
        模型和 workspace blobs 实际在这里更新.
        """
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                workspace.FeedBlob('gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
        ratio = _get_lr_change_ratio(cur_lr, new_lr)
        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
            self._CorrectMomentum(new_lr / cur_lr)

    def _CorrectMomentum(self, correction):
        """
        MomentumSGDUpdate Op 实现的对 V 的更新方式:
            V := mu * V + lr * grad,

        其中,mu 是 momentum 因子
             lr 是 学习率
             grad 是随机梯度stochastic gradient
        由于 V 不是独立于学习率 lr 定义的(虽然理想情况下应该是独立的),
        当 lr 改变时,更新历史 V 也应该改变,以保持与  lr * grad 的尺度scale一致.
        """
        logger.info('Scaling update history by {:.6f} (new lr / old lr)'.format(correction))
        for i in range(cfg.NUM_GPUS):
            with c2_utils.CudaScope(i):
                for param in self.TrainableParams(gpu_id=i):
                    op = core.CreateOperator('Scale', [param + '_momentum'], [param + '_momentum'],
                                             scale=correction)
                    workspace.RunOperatorOnce(op)

    def AddLosses(self, losses):
        if not isinstance(losses, list):
            losses = [losses]
        # Conversion to str allows losses to include BlobReferences
        losses = [c2_utils.UnscopeName(str(l)) for l in losses]
        self.losses = list(set(self.losses + losses))

    def AddMetrics(self, metrics):
        if not isinstance(metrics, list):
            metrics = [metrics]
        self.metrics = list(set(self.metrics + metrics))


def _get_lr_change_ratio(cur_lr, new_lr):
    eps = 1e-10
    ratio = np.max((new_lr / np.max((cur_lr, eps)),
                    cur_lr / np.max((new_lr, eps))) )
    return ratio

你可能感兴趣的:(Caffe2,Caffe2)