在detectron中,首先要构建FPN,构建的过程见博客detectron代码理解(二):FPN模型构建,在FPN构建完毕后再进行Retinanet的构建,构建过程指的是上图中虚线的部分。
首先经过FPN的构建后,各个FPN层的名称如下,每一层的特征图数量为256。
BlobReference("gpu_0/fpn_7"), |
BlobReference("gpu_0/fpn_6"), |
BlobReference("gpu_0/fpn_res5_2_sum"), |
BlobReference("gpu_0/fpn_res4_5_sum"), |
BlobReference("gpu_0/fpn_res3_3_sum") |
构建的代码位于retinanet_head.py中的add_fpn_retinanet_outputs()函数。
构建的顺序是:1.依次对每一层FPN先添加分类层,也就是上图虚线框中的class subnet
2.再对每一层的FPN添加box回归层,也就是虚线框中的box subnet
分类层结构是 conv->relu->conv->relu->conv->relu->conv->relu->logits layer
也就是共有4个卷积层+relu层,卷积的大小为3,维度为256,最后再添加一个logits层。
for lvl in range(k_min, k_max + 1): #依次对于每一个FPN层,增加分类层
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order 从gpu_0/fpn_res3_3_sum开始
# classification tower stack convolution starts
for nconv in range(cfg.RETINANET.NUM_CONVS): #4个3×3的卷积层
suffix = 'n{}_fpn{}'.format(nconv, lvl) #n0_fpn3
dim_in, dim_out = dim_in, dim_in
if lvl == k_min:
bl_out = model.Conv(
bl_in,
'retnet_cls_conv_' + suffix, #retnet_cls_conv_n0_fpn3
dim_in, #256 FPN层的特征图数量
dim_out, #256 输出的特征图数量
3,
stride=1,
pad=1,
weight_init=('GaussianFill', {
'std': 0.01
}),
bias_init=('ConstantFill', {
'value': 0.
})
)
else:
bl_out = model.ConvShared(
bl_in,
'retnet_cls_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),
bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)
)
bl_in = model.Relu(bl_out, bl_out)
bl_feat = bl_in
# cls tower stack convolution ends. Add the logits layer now 增加4个卷积层之后就是逻辑回归
if lvl == k_min:
retnet_cls_pred = model.Conv(
bl_feat,
'retnet_cls_pred_fpn{}'.format(lvl),
dim_in,
cls_pred_dim * A, #对于A个anchor,每一个anchor预测num_classes个结果
3,
pad=1,
stride=1,
weight_init=('GaussianFill', {
'std': 0.01
}),
bias_init=bias_init
)
else:
retnet_cls_pred = model.ConvShared(
bl_feat,
'retnet_cls_pred_fpn{}'.format(lvl),
dim_in,
cls_pred_dim * A,
3,
pad=1,
stride=1,
weight='retnet_cls_pred_fpn{}_w'.format(k_min),
bias='retnet_cls_pred_fpn{}_b'.format(k_min)
)
if not model.train:
if cfg.RETINANET.SOFTMAX:
model.net.GroupSpatialSoftmax(
retnet_cls_pred,
'retnet_cls_prob_fpn{}'.format(lvl),
num_classes=cls_pred_dim
)
else:
model.net.Sigmoid(
retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl)
)
if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
bbox_feat_list.append(bl_feat)
每一层FPN连接的分类层的名称为:(前四个行是卷积层,最后一行是logits层)
P3 | P4 | P5 | P6 | P7 |
gpu_0/retnet_cls_conv_n0_fpn3 gpu_0/retnet_cls_conv_n1_fpn3 gpu_0/retnet_cls_conv_n2_fpn3 gpu_0/retnet_cls_conv_n3_fpn3 gpu_0/retnet_cls_pred_fpn3 |
gpu_0/retnet_cls_conv_n0_fpn4 gpu_0/retnet_cls_pred_fpn4 |
gpu_0/retnet_cls_conv_n0_fpn5 gpu_0/retnet_cls_pred_fpn5 |
gpu_0/retnet_cls_conv_n0_fpn6 gpu_0/retnet_cls_pred_fpn6 |
gpu_0/retnet_cls_conv_n0_fpn7 gpu_0/retnet_cls_pred_fpn7 |
四个卷积层的w和b: retnet_cls_conv_n0_fpn3_w retnet_cls_conv_n0_fpn3_b 一个logits层的w和b :retnet_cls_pred_fpn3_w retnet_cls_pred_fpn3_b |
值得注意的是,fpn4-fpn7所连接的分类层卷积的参数是共享的。
在构建完分类层后,就是构建box回归层。
box回归层结构也是 conv->relu->conv->relu->conv->relu->conv->relu->bbox_predit
也就是共有4个卷积层+relu层,卷积的大小为3,维度为256,最后再添加一个logits层。
if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER: # cfg.RETINANET.SHARE_CLS_BBOX_TOWER = False,故运行下面的代码
for lvl in range(k_min, k_max + 1): #依次对于每一个FPN层,增加box回归
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order
for nconv in range(cfg.RETINANET.NUM_CONVS):
suffix = 'n{}_fpn{}'.format(nconv, lvl)
dim_in, dim_out = dim_in, dim_in
if lvl == k_min:
bl_out = model.Conv(
bl_in,
'retnet_bbox_conv_' + suffix,
dim_in, #256
dim_out,#256
3,
stride=1,
pad=1,
weight_init=('GaussianFill', {
'std': 0.01
}),
bias_init=('ConstantFill', {
'value': 0.
})
)
else:
bl_out = model.ConvShared(
bl_in,
'retnet_bbox_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight='retnet_bbox_conv_n{}_fpn{}_w'.format(
nconv, k_min
),
bias='retnet_bbox_conv_n{}_fpn{}_b'.format(
nconv, k_min
)
)
print(bl_out)
bl_in = model.Relu(bl_out, bl_out)
# Add octave scales and aspect ratio
# At least 1 convolution for dealing different aspect ratios
bl_feat = bl_in
bbox_feat_list.append(bl_feat)
# Depending on the features [shared/separate] for bbox, add prediction layer
for i, lvl in enumerate(range(k_min, k_max + 1)):
bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)
bl_feat = bbox_feat_list[i]
if lvl == k_min:
model.Conv(
bl_feat, #输入的blob
bbox_pred, #输出的blob名称
dim_in, #输入的维度
bbox_regr_dim * A, #输出的维度
3,
pad=1,
stride=1,
weight_init=('GaussianFill', {
'std': 0.01
}),
bias_init=('ConstantFill', {
'value': 0.
})
)
else:
model.ConvShared(
bl_feat,
bbox_pred,
dim_in,
bbox_regr_dim * A,
3,
pad=1,
stride=1,
weight='retnet_bbox_pred_fpn{}_w'.format(k_min),
bias='retnet_bbox_pred_fpn{}_b'.format(k_min)
)
print(bbox_pred)
每一层FPN连接的box回归层的名称为:
P3 | P4 | P5 | P6 | P7 |
gpu_0/retnet_bbox_conv_n0_fpn3 gpu_0/retnet_bbox_conv_n1_fpn3 gpu_0/retnet_bbox_conv_n2_fpn3 gpu_0/retnet_bbox_conv_n3_fpn3 |
gpu_0/retnet_bbox_conv_n0_fpn4 gpu_0/retnet_bbox_conv_n1_fpn4 gpu_0/retnet_bbox_conv_n2_fpn4 gpu_0/retnet_bbox_conv_n3_fpn4 |
gpu_0/retnet_bbox_conv_n0_fpn5 gpu_0/retnet_bbox_conv_n1_fpn5 gpu_0/retnet_bbox_conv_n2_fpn5 gpu_0/retnet_bbox_conv_n3_fpn5 |
gpu_0/retnet_bbox_conv_n0_fpn6 gpu_0/retnet_bbox_conv_n1_fpn6 gpu_0/retnet_bbox_conv_n2_fpn6 gpu_0/retnet_bbox_conv_n3_fpn6 |
gpu_0/retnet_bbox_conv_n0_fpn7 gpu_0/retnet_bbox_conv_n1_fpn7 gpu_0/retnet_bbox_conv_n2_fpn7 gpu_0/retnet_bbox_conv_n3_fpn7 |
卷积层的w和b | retnet_bbox_conv_n0_fpn3_w retnet_bbox_conv_n0_fpn3_b | retnet_bbox_conv_n0_fpn3_w retnet_bbox_conv_n0_fpn3_b | retnet_bbox_conv_n0_fpn3_w retnet_bbox_conv_n0_fpn3_b | retnet_bbox_conv_n0_fpn3_w retnet_bbox_conv_n0_fpn3_b |
retnet_bbox_pred_fpn3 | retnet_bbox_pred_fpn4 | retnet_bbox_pred_fpn5 | retnet_bbox_pred_fpn6 | retnet_bbox_pred_fpn7 |
bbox_predit层的w和b | retnet_bbox_pred_fpn3_w retnet_bbox_pred_fpn3_b | retnet_bbox_pred_fpn3_w retnet_bbox_pred_fpn3_b | retnet_bbox_pred_fpn3_w retnet_bbox_pred_fpn3_b | retnet_bbox_pred_fpn3_w retnet_bbox_pred_fpn3_b |
表格第二行是每一层FPN的四个卷积层的名称
第三行是相应的卷积层用到的w和b,可以看出P4和P7是参数共享的
第四行是最后的bbox_predit层,第五行是添加该所用的w和b
从代码中也可以看到开头有一个cfg.RETINANET.SHARE_CLS_BBOX_TOWER,默认是False,也就是不与分类层共享四个卷积层,如果改成True的话,就直接跳到下面的for循环,将bbox_predit层直接接在了分类层的四个卷积层后面
添加损失函数位于retianet_head.py中的add_fpn_retinanet_loss函数
def add_fpn_retinanet_losses(model):
loss_gradients = {}
gradients, losses = [], []
k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid
k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid
model.AddMetrics(['retnet_fg_num', 'retnet_bg_num'])
# ==========================================================================
# bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location
# ==========================================================================
for lvl in range(k_min, k_max + 1):
suffix = 'fpn{}'.format(lvl)
bbox_loss = model.net.SelectSmoothL1Loss(
[
'retnet_bbox_pred_' + suffix,
'retnet_roi_bbox_targets_' + suffix,
'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num'
],
'retnet_loss_bbox_' + suffix,
beta=cfg.RETINANET.BBOX_REG_BETA,
scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT
)
gradients.append(bbox_loss)
losses.append('retnet_loss_bbox_' + suffix)
# ==========================================================================
# cls loss - depends on softmax/sigmoid outputs
# ==========================================================================
for lvl in range(k_min, k_max + 1):
suffix = 'fpn{}'.format(lvl)
cls_lvl_logits = 'retnet_cls_pred_' + suffix
if not cfg.RETINANET.SOFTMAX:
cls_focal_loss = model.net.SigmoidFocalLoss(
[
cls_lvl_logits, 'retnet_cls_labels_' + suffix,
'retnet_fg_num'
],
['fl_{}'.format(suffix)],
gamma=cfg.RETINANET.LOSS_GAMMA,
alpha=cfg.RETINANET.LOSS_ALPHA,
scale=model.GetLossScale(),
num_classes=model.num_classes - 1
)
gradients.append(cls_focal_loss)
losses.append('fl_{}'.format(suffix))
else:
cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss(
[
cls_lvl_logits, 'retnet_cls_labels_' + suffix,
'retnet_fg_num'
],
['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)],
gamma=cfg.RETINANET.LOSS_GAMMA,
alpha=cfg.RETINANET.LOSS_ALPHA,
scale=model.GetLossScale(),
num_classes=model.num_classes
)
gradients.append(cls_focal_loss)
losses.append('fl_{}'.format(suffix))
loss_gradients.update(blob_utils.get_loss_gradients(model, gradients))
model.AddLosses(losses)
return loss_gradients
SelectSmoothL1Loss | SigmoidFocalLoss | |||
FPN层 | 输入(标红部分为retinanet中box回归层的输出) | 输出 | 输入(标红部分为retinanet中分类层的输出) | 输出 |
p3 | retnet_bbox_pred_fpn3 retnet_roi_bbox_targets_fpn3 retnet_roi_fg_bbox_locs_fpn3 retnet_fg_num |
retnet_loss_bbox_fpn3 | retnet_cls_pred_fpn3 retnet_cls_labels_fpn3 retnet_fg_num | fl_fpn3 |
p4 | retnet_bbox_pred_fpn4 retnet_roi_bbox_targets_fpn4 retnet_roi_fg_bbox_locs_fpn4 retnet_fg_num | retnet_loss_bbox_fpn4 | retnet_cls_pred_fpn4 retnet_cls_labels_fpn4 retnet_fg_num | fl_fpn4 |
p5 | retnet_bbox_pred_fpn5 retnet_roi_bbox_targets_fpn5 retnet_roi_fg_bbox_locs_fpn5 retnet_fg_num | retnet_loss_bbox_fpn5 | retnet_cls_pred_fpn5 retnet_cls_labels_fpn5 retnet_fg_num | fl_fpn5 |
p6 | retnet_bbox_pred_fpn6 retnet_roi_bbox_targets_fpn6 retnet_roi_fg_bbox_locs_fpn6 retnet_fg_num | retnet_loss_bbox_fpn6 | retnet_cls_pred_fpn6 retnet_cls_labels_fpn6 retnet_fg_num | fl_fpn6 |
p7 | retnet_bbox_pred_fpn7 retnet_roi_bbox_targets_fpn7 retnet_roi_fg_bbox_locs_fpn7 retnet_fg_num | retnet_loss_bbox_fpn7 | retnet_cls_pred_fpn7 retnet_cls_labels_fpn7 retnet_fg_num | fl_fpn7 |
根据detectron代码理解(五):Retinanet网络输入理解得到训练过程中输入的blob为:
[u'im_info', u'retnet_fg_num', u'retnet_bg_num',
u'retnet_cls_labels_fpn3', u'retnet_roi_bbox_targets_fpn3', u'retnet_roi_fg_bbox_locs_fpn3',
u'retnet_cls_labels_fpn4', u'retnet_roi_bbox_targets_fpn4', u'retnet_roi_fg_bbox_locs_fpn4',
u'retnet_cls_labels_fpn5', u'retnet_roi_bbox_targets_fpn5', u'retnet_roi_fg_bbox_locs_fpn5',
u'retnet_cls_labels_fpn6', u'retnet_roi_bbox_targets_fpn6', u'retnet_roi_fg_bbox_locs_fpn6',
u'retnet_cls_labels_fpn7', u'retnet_roi_bbox_targets_fpn7', u'retnet_roi_fg_bbox_locs_fpn7']
可以看到上表中除了标红部分,其余的部分都是训练过程中输入的blob