faster rcnn代码解读参考
:https://github.com/adityaarun1/pytorch_fast-er_rcnn
https://github.com/jwyang/faster-rcnn.pytorch
前面已经涉及到了feature map的提取
rpn_head回归的bg/fg以及偏移量(中心点+长宽)
proposals_layer生成了feature map的proposals,并且将rpn_head回归的偏移及长宽放缩信息作用进来,也就是实际上训练时候的rpn_proposals有了,接下来需要的就是rpn_target,也就是回归训练的目标。
class anchor_target_layer(nn.Module):
"""
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
"""
def __init__(self, feat_stride, scales, ratios):
super(anchor_target_layer, self).__init__()
self._feat_stride = feat_stride
self._scales = scales
self._anchor_scales = scales
self._anchor_ratios = ratios
self._num_anchors = len(scales)*len(ratios) # anchor_scales*anchor_ratios
# allow boxes to sit over the edge by a small amount
self._allowed_border = 0 # default is 0 不填充anchor border
def forward(self, rpn_cls_score,gt_boxes,im_info):
# rpn_cls_score: [batch, feat_h, feat_w, (num_anchors * 2)]
# gt_boxes: [batch, None, 5](x1, y1, x2, y2, label)
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# print('gt_boxes',gt_boxes.shape)
# [batch, feat_h, feat_w, (num_anchors * 2)]
batch_size, feat_height, feat_width = rpn_cls_score.shape[0:3]#获取features的h\w
all_anchors, total_anchors = generate_anchors_pre(feat_height,feat_width, self._feat_stride,
self._anchor_scales, self._anchor_ratios) #在feature大小上生成所有anchor
all_anchors=torch.from_numpy(all_anchors).to(gt_boxes.device)
# only keep anchors inside the image
keep = ((all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < int(im_info[0,1]) + self._allowed_border) width
(all_anchors[:, 3] < int(im_info[0,0]) + self._allowed_border))# height
inds_inside = torch.nonzero(keep).view(-1)
# keep only inside anchors
anchors = all_anchors[inds_inside, :]#仅包含边框内的anchor
# 扩展成批量大小
anchors = torch.stack((anchors,) * batch_size, dim=0)
# label: 1 is positive, 0 is negative, -1 is dont care
#[batch_size,total_anchors]
labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) #初始化labels为-1
# [batch_size,total_anchors]
bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()#初始化bbox_inside_weights为0
# [batch_size,total_anchors]
bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()#初始化bbox_outside_weights为0
# [batch_size,total_anchors,4]、 [batch_size,gt_num, 5] ->[batch_size,total_anchors,gt_num]
overlaps = bbox_overlaps_batch(anchors, gt_boxes)#计算生成的anchor和gt_boxes的overlap
# [batch_size, total_anchors, gt_num] #overlaps在多个gt_box中最大的,以及对应索引
max_overlaps, argmax_overlaps = torch.max(overlaps, 2)#取出overlap在axis=2维最大值及对应的索引
gt_max_overlaps, _ = torch.max(overlaps, 1)
if not cfg['rpn_clobber_positives']:
# assign bg labels first so that positive labels can clobber them
# first set the negatives
labels[max_overlaps < cfg['rpn_negative_overlap']] = 0 #设为背景标签
gt_max_overlaps[gt_max_overlaps==0] = 1e-5 #设置近似0
keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)#计算overlap满足条件的索引
if torch.sum(keep) > 0:
labels[keep>0] = 1 # 将其设置为前景
# fg label: above threshold IOU
labels[max_overlaps >= cfg['rpn_positive_overlap']] = 1#如果大于前景阈值也是职位前景
if cfg['rpn_clobber_positives']:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg['rpn_negative_overlap']] = 0 #最后在分配背景标签
# subsample positive labels if we have too many
num_fg = int(cfg['rpn_fg_fraction'] * cfg['rpn_batchsize'])#计算rcnn batchsize中应该有多少前景
sum_fg = torch.sum((labels == 1).int(), 1)#各批次中前景数
sum_bg = torch.sum((labels == 0).int(), 1)#各批次中背景数
for i in range(batch_size):
# subsample positive labels if we have too many
if sum_fg[i] > num_fg:
fg_inds = torch.nonzero(labels[i] == 1).view(-1)
# torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
# See https://github.com/pytorch/pytorch/issues/1868 for more details.
# use numpy instead.
#rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
labels[i][disable_inds] = -1
# subsample negative labels if we have too many
num_bg = cfg['rpn_batchsize'] - torch.sum((labels == 1).int(), 1)[i]#重新计算一下现在背景数
# subsample negative labels if we have too many
if sum_bg[i] > num_bg:
bg_inds = torch.nonzero(labels[i] == 0).view(-1)
#rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()
rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
labels[i][disable_inds] = -1
offset = torch.arange(0, batch_size)*gt_boxes.size(1)#为了将gt_boxes放置到一个list将其列展开
argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
# 所有anchor与所有gt_boxes(已经将不同batch的放到一个中,如[2,5][4,5]-->[6,5])
bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))
# use a single value instead of 4 values for easy index.
bbox_inside_weights[labels==1] = cfg['rpn_bbox_inside_weights'][0]#设置所有图像内的前景bbox权重
if cfg['rpn_positive_weight']< 0:
num_examples = torch.sum(labels[i] >= 0)
positive_weights = 1.0 / num_examples.item()
negative_weights = 1.0 / num_examples.item()
else:
assert ((cfg['rpn_positive_weight'] > 0) &
(cfg['rpn_positive_weight']< 1))
bbox_outside_weights[labels == 1] = positive_weights
bbox_outside_weights[labels == 0] = negative_weights
labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)#将label映射回[batch_size,total_anchors,1 ]
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,5 ]
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,1 ]
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,1 ]
#labels
labels = labels.view(batch_size, feat_height, feat_width, self._num_anchors )#.permute(0,3,1,2).contiguous()
# [batch_size, 1, _num_anchors * feat_height, feat_width]
rpn_labels = labels.view(batch_size, 1, self._num_anchors * feat_height, feat_width)
# bbox_targets
#[batch_size, feat_height, feat_width,_num_anchors *4]
rpn_bbox_targets = bbox_targets.view(batch_size, feat_height, feat_width, self._num_anchors *4)#.permute(0,3,1,2).contiguous()
# bbox_inside_weights
anchors_count = bbox_inside_weights.size(1)
#[batch_size,anchors_count,4]
bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
#[batch_size, feat_height, feat_width,._num_anchors *4]
rpn_bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, feat_height, feat_width, 4*self._num_anchors )\
# .permute(0,3,1,2).contiguous()
# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
#[batch_size, feat_height, feat_width,._num_anchors *4]
rpn_bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, feat_height, feat_width, 4*self._num_anchors )\
# .permute(0,3,1,2).contiguous()
'''
rpn_labels: [b, 1,num_anchors*feat_height, feat_width]
rpn_bbox_targets: [b, feat_height, feat_width,num_anchors*4]
rpn_bbox_inside_weights: [b, feat_height, feat_width,num_anchors*4]
rpn_bbox_outside_weights: [b, feat_height, feat_width,num_anchors*4]
'''
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
一、代码解析
这里有一个generate_anchors_pre,好容易理解,就是之前用来生成整幅feature map的anchors。
这里使用了_allowed_border来限制anchor与feature map边框的距离。
torch.stack就是将一张图的anchors放到批量里面,这里就要求传进来的image大小是一样的。
变量声明:
bbox_outside_weights这一部分就是当smooth1 l1计算完之后,对rpn bbox regression loss的权重 。[batch_size,total_anchors]
overlaps也就是计算rpn_target中生成的anchor与真实的gt_boxes之间的iou。
def bbox_overlaps_batch(anchors, gt_boxes):
"""
anchors: (N, 4) ndarray of float
gt_boxes: (b, K, 5) ndarray of float
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
batch_size = gt_boxes.size(0)
if anchors.dim() == 2:
N = anchors.size(0)
K = gt_boxes.size(1)
anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous()
gt_boxes = gt_boxes[:, :, :4].contiguous()
gt_boxes_x = (gt_boxes[:, :, 2] - gt_boxes[:, :, 0] + 1)
gt_boxes_y = (gt_boxes[:, :, 3] - gt_boxes[:, :, 1] + 1)
gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
anchors_boxes_x = (anchors[:, :, 2] - anchors[:, :, 0] + 1)
anchors_boxes_y = (anchors[:, :, 3] - anchors[:, :, 1] + 1)
anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
iw = (torch.min(boxes[:, :, :, 2], query_boxes[:, :, :, 2]) -
torch.max(boxes[:, :, :, 0], query_boxes[:, :, :, 0]) + 1)
iw[iw < 0] = 0
ih = (torch.min(boxes[:, :, :, 3], query_boxes[:, :, :, 3]) -
torch.max(boxes[:, :, :, 1], query_boxes[:, :, :, 1]) + 1)
ih[ih < 0] = 0
ua = anchors_area + gt_boxes_area - (iw * ih)
overlaps = iw * ih / ua
# mask the overlap here.
overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
elif anchors.dim() == 3:
N = anchors.size(1)
K = gt_boxes.size(1)
if anchors.size(2) == 4:
anchors = anchors[:, :, :4].contiguous()
else:
anchors = anchors[:, :, 1:5].contiguous()
gt_boxes = gt_boxes[:, :, :4].contiguous()
gt_boxes_x = (gt_boxes[:, :, 2] - gt_boxes[:, :, 0] + 1)
gt_boxes_y = (gt_boxes[:, :, 3] - gt_boxes[:, :, 1] + 1)
gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
anchors_boxes_x = (anchors[:, :, 2] - anchors[:, :, 0] + 1)
anchors_boxes_y = (anchors[:, :, 3] - anchors[:, :, 1] + 1)
anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
iw = (torch.min(boxes[:, :, :, 2], query_boxes[:, :, :, 2]) -
torch.max(boxes[:, :, :, 0], query_boxes[:, :, :, 0]) + 1)
iw[iw < 0] = 0
ih = (torch.min(boxes[:, :, :, 3], query_boxes[:, :, :, 3]) -
torch.max(boxes[:, :, :, 1], query_boxes[:, :, :, 1]) + 1)
ih[ih < 0] = 0
ua = anchors_area + gt_boxes_area - (iw * ih)
# Intersection (iw * ih) divided by Union (ua)
overlaps = iw * ih / ua
# mask the overlap here.
overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
else:
raise ValueError('anchors input dimension is not correct.')
return overlaps
因为代码是期望通过批量操作的,因此直接说anchors.dim()=3的那部分。
N、K分别表示rpn_target生成的anchor个数以及、真实的gt_boxes个数,从侧面也可以看到,批量处理中还是希望gt_boxes也是每张图都一样多的。
从anchor的dim=2的[1:5]中取出bbox,也就是第0为是与bg/fg有关的。
而gt_boxes的dim=2中[0:4]是bbox,第4为label
a、首先求出gt_boxes的面积area,并view成[batch_size, 1, K],以及anchors面积并view成[batch_size,N,1]
b、继续将anchor view成[batch_size,N,4]->[batch_size,N, 1,4],然后将anchor继续扩展到每个anchor对应到gt_boxes。[batch_size, N, K, 4]
c、将gt_boxes从[batch_size, K, 4] view到[batch_size, 1, K, 4],然后继续扩展到[batch_size, N, K, 4]
d、计算overlaps
e、利用mask对overlap进行掩码操作,也就是gt_area=0的设置为0,对anchors_area=0设置为-1
计算完overlap之后,就可以利用overlap进行相应的nms操作。
overlap为[batch_size, total_anchors, gt_num]也就是每个anchor与对应的gt_num都有overlap。
计算每个anchor交集最大gt_boxes的max_overlaps和索引argmax_overlaps
以及每个gt_boxes与所有anchor交集的最大gt_max_overlap。
标记每个anchor的max_overlap 如果gt_max_overlaps=0则重置为1e-5. 计算overlap=gt_max_overlap的索引扩展[batch_size, total_anchors, gt_num]并使用torch.sum中每个anchor的对应的gt_boxes只和 将keep>0的labels=1将overlap=gt_max_overlap的部分设置为前景。 然后将max_overlap>=rpn_positive_overlap对应的labels=1也设置为前景 为了避免操作上的重叠重新将max_overlaps 计算rcnn回归中batch中fg的个数 然后计算labels中fg和bg的个数 针对每个批次单独操作。从labels的fg中随机选择num_fg个fg,并标记为-1。从labels的bg中随机选择num_bg个bg,并将labels=-1。其实就是不进行后面的计算。 计算偏移量offset,主要是为了将不同image中的不同个数的gt_boxes一起运算。 将argmax_overlaps = argmax_overlaps + offset 计算bbox target 将box_target表示为中心点偏移+log(gt_w/ac_w) 将labels=1的bbox_inside_weights设置为rpn_bbox_inside_weights 计算bbox_outside_weights中的fg和bg都设置为1/fg_num 将labels、bbox_targets、bbox_inside_weights、bbox_outside_weights重新映射到期望大小。 二、总结def _compute_targets_batch(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
return bbox_transform_batch(ex_rois, gt_rois[:, :, :4])
def bbox_transform_batch(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
wx, wy, ww, wh = weights
if ex_rois.dim() == 2:
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths))
targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights))
elif ex_rois.dim() == 3:
ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0
ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0
ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = torch.log(gt_widths / ex_widths)
targets_dh = torch.log(gt_heights / ex_heights)
else:
raise ValueError('ex_roi input dimension is not correct.')
targets = torch.stack(
(targets_dx, targets_dy, targets_dw, targets_dh),2)
return targets
def _unmap(data, count, inds, batch_size, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 2:
ret = torch.Tensor(batch_size, count).fill_(fill).type_as(data)
ret[:, inds] = data
else:
ret = torch.Tensor(batch_size, count, data.size(2)).fill_(fill).type_as(data)
ret[:, inds,:] = data
return ret
'''
rpn_labels: [b, 1,num_anchors*feat_height, feat_width]#rpn的fg/bg标签(1,0)如果为-1则表示不运算
rpn_bbox_targets: [b, feat_height, feat_width,num_anchors*4]#rpn的bbox的target
rpn_bbox_inside_weights: [b, feat_height, feat_width,num_anchors*4]#rpn的fg权重
rpn_bbox_outside_weights: [b, feat_height, feat_width,num_anchors*4]#rpn的smooth l1的权重
'''