Detecton 定义了一个 DetectionModelHelper 类,来表示 Detectron 模型.
"""
Defines DetectionModelHelper,
the class that represents a Detectron model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import logging
from caffe2.python import cnn
from caffe2.python import core
from caffe2.python import workspace
from core.config import cfg
# 定制的 Python Ops
from ops.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
from ops.generate_proposal_labels import GenerateProposalLabelsOp
from ops.generate_proposals import GenerateProposalsOp
# lr 策略
from utils import lr_policy
import roi_data.fast_rcnn
import utils.c2 as c2_utils
logger = logging.getLogger(__name__)
class DetectionModelHelper(cnn.CNNModelHelper): # 父类 - cnn.CNNModelHelper
def __init__(self, **kwargs):
# DetectionModelHelper 的参数处理,其它的参数送入 cnn.CNNModelHelper
self.train = kwargs.get('train', False)
self.num_classes = kwargs.get('num_classes', -1)
assert self.num_classes > 0, 'num_classes must be > 0'
for k in ('train', 'num_classes'):
if k in kwargs:
del kwargs[k]
kwargs['order'] = 'NCHW'
# 安全性起见,设置 cudnn_exhaustive_search 为 False,以免 CNNModelHelper 默认值出现改变.
# detection 代码使用可变尺寸输入(variable size inputs),
# cudnn_exhaustive_search 为 True时,可能不太友好.
kwargs['cudnn_exhaustive_search'] = False
super(DetectionModelHelper, self).__init__(**kwargs)
self.roi_data_loader = None
self.losses = []
self.metrics = []
self.do_not_update_params = [] # 不进行更新的参数列表,此列表内的参数不进行更新
self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE # dag
self.net.Proto().num_workers = cfg.NUM_GPUS * 4 # 每个 GPU, 4 个worker
self.prev_use_cudnn = self.use_cudnn
def TrainableParams(self, gpu_id=-1):
"""
所有训练参数的 blobs names,可能根据 GPU id 来过滤.
"""
return [p for p in self.params if (p in self.param_to_grad and # p has a gradient
p not in self.do_not_update_params and # not on the blacklist
(gpu_id == -1 or # filter for gpu assignment, if gpu_id set
str(p).find('gpu_{}'.format(gpu_id)) == 0) )]
def AffineChannel(self, blob_in, blob_out, share_with=None, inplace=False):
"""
仿射变换(affine transformation),替代网络中的 BN,是 BN 层不被使用.
(e.g., 因为 minibatch 太小).
通过指定 share_with 参数中 blob name(排除 '_{s,b}' 后缀),
可以将 AffineChannel 参数与其它 AffineChannelOp 共享.
用于减少内存.
"""
blob_out = blob_out or self.net.NextName()
is_not_sharing = share_with is None
param_prefix = blob_out if is_not_sharing else share_with
scale = core.ScopedBlobReference(param_prefix + '_s', self.param_init_net)
bias = core.ScopedBlobReference(param_prefix + '_b', self.param_init_net)
if is_not_sharing:
self.net.Proto().external_input.extend([str(scale), str(bias)])
self.params.extend([scale, bias])
self.weights.append(scale)
self.biases.append(bias)
if inplace:
return self.net.AffineChannel([blob_in, scale, bias], blob_in)
else:
return self.net.AffineChannel([blob_in, scale, bias], blob_out)
def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):
"""
Python Op - 生成 RPN proposals.
- 输入 blobs_in:
- rpn_cls_probs: 4D tensor, shape (N, A, H, W)
其中, N - minibatch images 数
A - 每个位置(per locations)的 anchors 数
(H, W) - 预测网格(grid) 的空间尺寸
每个值表示一个物体的概率(probability of object), [0, 1]
- rpn_cls_pred: 4D tensor, shape (N, 4*A, H, W)
将 anchor boxes 变换为 RPN proposals 的预测的 deltas 值.
- im_info: 2D tensor, shape (N, 3)
其中,3 列值分别是输入图片的 [height, width, scale].
height 和 width 是网络的输入.
scale 是将原始图片缩放到网络输入尺寸的缩放因子 scale factor.
- 输出 blobs_out:
- rpn_rois: 2D tensor, shape (R, 5)
对于 R 个 PRN proposals, 5 列值分别为 [batch ind, x1, y1, x2, y2].
boxes 是关于网络输入尺寸的,即:原始图片的 scaled 版本.
这些 proposals 必须缩放scaled: 1/scale (其中,scale 来自 im_info) 以变换到原始输入图片的坐标系.
- rpn_rois_probs: 1D tensor, objectness 概率分数(probability scores). (从 rpn_cls_probs 提取得到.)
"""
name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])
self.net.Python(GenerateProposalsOp(anchors, spatial_scale, self.train).forward)(blobs_in, blobs_out, name=name)
# Caffe2 提供了 high-level 接口,用于 Python ops 创建 —— Net.Python() 接口.
return blobs_out
def GenerateProposalLabels(self, blobs_in):
"""
Python Op - 生成 RPN proposals 的训练 labels.
- 用于与 Fast/Mask R-CNN 联合训练时的 RPN 训练(如end-to-end Faster R-CNN 训练)
- 输入blobs - blobs_in:
- rpn_rois: GenerateProposals 输出的 RPN proposals,2D tensor.
- roidb: 待 labeled 的 roidb entries.
- im_info: 参考 GenerateProposals 文档.
- 输出blobs - blobs_out:
- (blobs 的 variable set): 返回模型训练需要的 blobs.
通过查询 data loader 来返回需要的 blobs 列表list.
"""
name = 'GenerateProposalLabelsOp:' + ','.join([str(b) for b in blobs_in])
# 在运行前,blobs 列表是未知的,因为其由训练的指定模型来确定.
# 查询 data loader 以得到输出 blobs names 的列表.
blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=self.train)
blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]
self.net.Python(GenerateProposalLabelsOp().forward)(blobs_in, blobs_out, name=name)
return blobs_out
def CollectAndDistributeFpnRpnProposals(self):
"""
Python Op - 将 multiple FPN levels 生成的 RPN proposals 进行合并,
并分发 FPN proposals 到对应的 FPN levels.
- 一个 FPN level 的 anchor 预测的一个 RoI 可能映射到另一个 FPN level,
因此需要重新分发 FPN proposals.
- Input Blobs:
- [rpn_rois_fpn, ..., rpn_rois_fpn,
rpn_roi_probs_fpn, ..., rpn_roi_probs_fpn]
其中,
rpn_rois_fpn - FPN level i 的 RPN proposals.
rpn_roi_probs_fpn` - FPN level i 的 RPN objectness 概率.
训练阶段使用时,Input Blobs 还包括:[roidb, im_info].
- Output blobs:
- [rois_fpn, ..., rois_rpn, rois, rois_idx_restore]
其中,
rois_fpn - FPN level i 的 RPN proposals.
rois_idx_restore - 所有 rois_fpn, i=min...max 组合的排列序列,
用于将 RPN RoIs 恢复到 Input Blobs 原来的顺序.
训练阶段使用时,Output Blobs 还包括: [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
"""
k_max = cfg.FPN.RPN_MAX_LEVEL
k_min = cfg.FPN.RPN_MIN_LEVEL
# 准备 input blobs
rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
score_names = ['rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)]
blobs_in = rois_names + score_names
if self.train:
blobs_in += ['roidb', 'im_info']
blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]
name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join([str(b) for b in blobs_in])
# 准备 output blobs
blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=self.train)
blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]
outputs = self.net.Python(CollectAndDistributeFpnRpnProposalsOp(self.train).forward)(blobs_in, blobs_out, name=name)
return outputs
def DropoutIfTraining(self, blob_in, dropout_rate):
"""
如果 model 处于训练模式,则添加 dropout,且 dropout_rate > 0.
"""
blob_out = blob_in
if self.train and dropout_rate > 0:
blob_out = self.Dropout(blob_in, blob_in, ratio=dropout_rate, is_test=False)
return blob_out
def RoIFeatureTransform(self, blobs_in, blob_out, blob_rois='rois', method='RoIPoolF',
resolution=7, spatial_scale=1. / 16., sampling_ratio=0):
"""
添加指定的 RoI Pooling 方法.
一些 RoI 变换方法支持 sampling_ratio 参数,有一些则不支持.
RoIFeatureTransform abstracts away:
- Use of FPN or not
- Specifics of the transform method
"""
assert method in {'RoIPoolF', 'RoIAlign'}, 'Unknown pooling method: {}'.format(method)
has_argmax = (method == 'RoIPoolF')
if isinstance(blobs_in, list):
# FPN case: add RoIFeatureTransform to each FPN level
k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid
k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid
assert len(blobs_in) == k_max - k_min + 1
bl_out_list = []
for lvl in range(k_min, k_max + 1):
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order 倒序
sc = spatial_scale[k_max - lvl] # in reversed order
bl_rois = blob_rois + '_fpn' + str(lvl)
bl_out = blob_out + '_fpn' + str(lvl)
bl_out_list.append(bl_out)
bl_argmax = ['_argmax_' + bl_out] if has_argmax else []
self.net.__getattr__(method)([bl_in, bl_rois],
[bl_out] + bl_argmax,
pooled_w=resolution,
pooled_h=resolution,
spatial_scale=sc,
sampling_ratio=sampling_ratio)
# 所有 levels 的 pooled features 沿着 batch dimension 连接为单个 4D tensor.
xform_shuffled, _ = self.net.Concat(bl_out_list,
[blob_out + '_shuffled', '_concat_' + blob_out],
axis=0 )
# Unshuffle to match rois from dataloader
restore_bl = blob_rois + '_idx_restore_int32'
xform_out = self.net.BatchPermutation([xform_shuffled, restore_bl], blob_out )
else:
# 单个 feature level
bl_argmax = ['_argmax_' + blob_out] if has_argmax else []
# RoIPoolF 忽略 sampling_ratio
xform_out = self.net.__getattr__(method)([blobs_in, blob_rois],
[blob_out] + bl_argmax,
pooled_w=resolution,
pooled_h=resolution,
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio )
# 只返回第一个 blob (transformed features)
return xform_out
def ConvShared(self, blob_in, blob_out, dim_in, dim_out, kernel, weight=None, bias=None, **kwargs):
"""
添加 conv op,与其它 conv op 共享 weights 和 biases.
"""
use_bias = ( False if ('no_bias' in kwargs and kwargs['no_bias']) else True )
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
if self.ws_nbytes_limit:
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
if use_bias:
blobs_in = [blob_in, weight, bias]
else:
blobs_in = [blob_in, weight]
if 'no_bias' in kwargs:
del kwargs['no_bias']
return self.net.Conv(blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs )
def BilinearInterpolation(self, blob_in, blob_out, dim_in, dim_out, up_scale ):
"""
在尺度空间(space of scale) 双线性插值(bilinear interpolation).
如果输入:NxKxHxW, 则输出:NxKx(sH)x(sW), 其中 s:= up_scale
From the CVPR'15 FCN code.
参见: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
"""
assert dim_in == dim_out
assert up_scale % 2 == 0, 'Scale should be even'
def upsample_filt(size):
"""
Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
"""
factor = (size + 1) // 2
if size % 2 == 1:
center = factor - 1
else:
center = factor - 0.5
og = np.ogrid[:size, :size]
return ((1 - abs(og[0] - center) / factor) *
(1 - abs(og[1] - center) / factor))
kernel_size = up_scale * 2
bil_filt = upsample_filt(kernel_size)
kernel = np.zeros((dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32)
kernel[range(dim_out), range(dim_in), :, :] = bil_filt
blob = self.ConvTranspose(blob_in, blob_out, dim_in, dim_out, kernel_size,
stride=int(up_scale), pad=int(up_scale / 2),
weight_init=('GivenTensorFill', {'values': kernel}),
bias_init=('ConstantFill', {'value': 0.}) )
self.do_not_update_params.append(self.weights[-1])
self.do_not_update_params.append(self.biases[-1])
return blob
def ConvAffine(self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
group=1, dilation=1, weight_init=None, bias_init=None,
suffix='_bn', inplace=False ): # 参数顺序与 Conv() 相同
"""
ConvAffine 添加一个 Conv Op,其后接一个 AffineChannels Op (fine tuning 时代替 BN).
"""
conv_blob = self.Conv(blob_in, prefix, dim_in, dim_out, kernel, stride=stride,
pad=pad, group=group, dilation=dilation,
weight_init=weight_init, bias_init=bias_init, no_bias=1)
blob_out = self.AffineChannel(conv_blob, prefix + suffix, inplace=inplace)
return blob_out
def DisableCudnn(self):
self.prev_use_cudnn = self.use_cudnn
self.use_cudnn = False
def RestorePreviousUseCudnn(self):
prev_use_cudnn = self.use_cudnn
self.use_cudnn = self.prev_use_cudnn
self.prev_use_cudnn = prev_use_cudnn
def UpdateWorkspaceLr(self, cur_iter):
"""
更新模型的当前学习率和 workspace(learning rate and update history/momentum blobs).
"""
# lr 位于 workspace 中
# 所有 GPUs 的 lr 一般相同.
cur_lr = workspace.FetchBlob('gpu_0/lr')[0]
new_lr = lr_policy.get_lr_at_iter(cur_iter)
# Python lr 和 GPU lr 不需要数据类型转换,都是 float32
# 因此,可以直接精确比较.l
if cur_lr != new_lr:
ratio = _get_lr_change_ratio(cur_lr, new_lr)
if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
logger.info('Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.
format(cur_lr, new_lr, cur_iter))
self._SetNewLr(cur_lr, new_lr)
return new_lr
def _SetNewLr(self, cur_lr, new_lr):
"""
模型和 workspace blobs 实际在这里更新.
"""
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
workspace.FeedBlob('gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
ratio = _get_lr_change_ratio(cur_lr, new_lr)
if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
self._CorrectMomentum(new_lr / cur_lr)
def _CorrectMomentum(self, correction):
"""
MomentumSGDUpdate Op 实现的对 V 的更新方式:
V := mu * V + lr * grad,
其中,mu 是 momentum 因子
lr 是 学习率
grad 是随机梯度stochastic gradient
由于 V 不是独立于学习率 lr 定义的(虽然理想情况下应该是独立的),
当 lr 改变时,更新历史 V 也应该改变,以保持与 lr * grad 的尺度scale一致.
"""
logger.info('Scaling update history by {:.6f} (new lr / old lr)'.format(correction))
for i in range(cfg.NUM_GPUS):
with c2_utils.CudaScope(i):
for param in self.TrainableParams(gpu_id=i):
op = core.CreateOperator('Scale', [param + '_momentum'], [param + '_momentum'],
scale=correction)
workspace.RunOperatorOnce(op)
def AddLosses(self, losses):
if not isinstance(losses, list):
losses = [losses]
# Conversion to str allows losses to include BlobReferences
losses = [c2_utils.UnscopeName(str(l)) for l in losses]
self.losses = list(set(self.losses + losses))
def AddMetrics(self, metrics):
if not isinstance(metrics, list):
metrics = [metrics]
self.metrics = list(set(self.metrics + metrics))
def _get_lr_change_ratio(cur_lr, new_lr):
eps = 1e-10
ratio = np.max((new_lr / np.max((cur_lr, eps)),
cur_lr / np.max((new_lr, eps))) )
return ratio