faster rcnn代码解读(六)anchor_target_layer

faster rcnn代码解读参考

:https://github.com/adityaarun1/pytorch_fast-er_rcnn

    https://github.com/jwyang/faster-rcnn.pytorch

前面已经涉及到了feature map的提取

rpn_head回归的bg/fg以及偏移量(中心点+长宽)

proposals_layer生成了feature map的proposals,并且将rpn_head回归的偏移及长宽放缩信息作用进来,也就是实际上训练时候的rpn_proposals有了,接下来需要的就是rpn_target,也就是回归训练的目标。

class anchor_target_layer(nn.Module):
    """
        Assign anchors to ground-truth targets. Produces anchor classification
        labels and bounding-box regression targets.
    """
    def __init__(self, feat_stride, scales, ratios):
        super(anchor_target_layer, self).__init__()

        self._feat_stride = feat_stride
        self._scales = scales
        self._anchor_scales = scales
        self._anchor_ratios = ratios
        self._num_anchors = len(scales)*len(ratios) # anchor_scales*anchor_ratios

        # allow boxes to sit over the edge by a small amount
        self._allowed_border = 0  # default is 0 不填充anchor border

    def forward(self, rpn_cls_score,gt_boxes,im_info):       
        # rpn_cls_score: [batch, feat_h, feat_w, (num_anchors * 2)]
        # gt_boxes: [batch, None, 5](x1, y1, x2, y2, label)

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # print('gt_boxes',gt_boxes.shape)
        # [batch, feat_h, feat_w, (num_anchors * 2)]
        batch_size, feat_height, feat_width = rpn_cls_score.shape[0:3]#获取features的h\w

        all_anchors, total_anchors = generate_anchors_pre(feat_height,feat_width, self._feat_stride,
                                              self._anchor_scales, self._anchor_ratios) #在feature大小上生成所有anchor
        all_anchors=torch.from_numpy(all_anchors).to(gt_boxes.device)
        # only keep anchors inside the image
        keep = ((all_anchors[:, 0] >= -self._allowed_border) &
                (all_anchors[:, 1] >= -self._allowed_border) &
                (all_anchors[:, 2] < int(im_info[0,1]) + self._allowed_border) &# width
                (all_anchors[:, 3] < int(im_info[0,0]) + self._allowed_border))# height
        inds_inside = torch.nonzero(keep).view(-1)
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]#仅包含边框内的anchor
        # 扩展成批量大小
        anchors = torch.stack((anchors,) * batch_size, dim=0)

        # label: 1 is positive, 0 is negative, -1 is dont care
        #[batch_size,total_anchors]
        labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1) #初始化labels为-1
        # [batch_size,total_anchors]
        bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()#初始化bbox_inside_weights为0
        # [batch_size,total_anchors]
        bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()#初始化bbox_outside_weights为0
        # [batch_size,total_anchors,4]、  [batch_size,gt_num, 5] ->[batch_size,total_anchors,gt_num]
        overlaps = bbox_overlaps_batch(anchors, gt_boxes)#计算生成的anchor和gt_boxes的overlap

        # [batch_size, total_anchors, gt_num] #overlaps在多个gt_box中最大的,以及对应索引
        max_overlaps, argmax_overlaps = torch.max(overlaps, 2)#取出overlap在axis=2维最大值及对应的索引
        gt_max_overlaps, _ = torch.max(overlaps, 1)
        if not cfg['rpn_clobber_positives']:
            # assign bg labels first so that positive labels can clobber them
            # first set the negatives
            labels[max_overlaps < cfg['rpn_negative_overlap']] = 0 #设为背景标签

        gt_max_overlaps[gt_max_overlaps==0] = 1e-5 #设置近似0
        keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)#计算overlap满足条件的索引

        if torch.sum(keep) > 0:
            labels[keep>0] = 1 # 将其设置为前景

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg['rpn_positive_overlap']] = 1#如果大于前景阈值也是职位前景

        if cfg['rpn_clobber_positives']:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg['rpn_negative_overlap']] = 0 #最后在分配背景标签

        # subsample positive labels if we have too many
        num_fg = int(cfg['rpn_fg_fraction'] * cfg['rpn_batchsize'])#计算rcnn batchsize中应该有多少前景

        sum_fg = torch.sum((labels == 1).int(), 1)#各批次中前景数
        sum_bg = torch.sum((labels == 0).int(), 1)#各批次中背景数

        for i in range(batch_size):
            # subsample positive labels if we have too many
            if sum_fg[i] > num_fg:
                fg_inds = torch.nonzero(labels[i] == 1).view(-1)
                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
                rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
                labels[i][disable_inds] = -1

            # subsample negative labels if we have too many
            num_bg = cfg['rpn_batchsize'] - torch.sum((labels == 1).int(), 1)[i]#重新计算一下现在背景数

            # subsample negative labels if we have too many
            if sum_bg[i] > num_bg:
                bg_inds = torch.nonzero(labels[i] == 0).view(-1)
                #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()

                rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
                labels[i][disable_inds] = -1

        offset = torch.arange(0, batch_size)*gt_boxes.size(1)#为了将gt_boxes放置到一个list将其列展开

        argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
        # 所有anchor与所有gt_boxes(已经将不同batch的放到一个中,如[2,5][4,5]-->[6,5])
        bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))

        # use a single value instead of 4 values for easy index.
        bbox_inside_weights[labels==1] = cfg['rpn_bbox_inside_weights'][0]#设置所有图像内的前景bbox权重

        if cfg['rpn_positive_weight']< 0:
            num_examples = torch.sum(labels[i] >= 0)
            positive_weights = 1.0 / num_examples.item()
            negative_weights = 1.0 / num_examples.item()
        else:
            assert ((cfg['rpn_positive_weight'] > 0) &
                    (cfg['rpn_positive_weight']< 1))

        bbox_outside_weights[labels == 1] = positive_weights
        bbox_outside_weights[labels == 0] = negative_weights

        labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)#将label映射回[batch_size,total_anchors,1 ]
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,5 ]
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,1 ]
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)#[batch_size,total_anchors,1 ]

        #labels
        labels = labels.view(batch_size, feat_height, feat_width, self._num_anchors )#.permute(0,3,1,2).contiguous()
        # [batch_size, 1, _num_anchors  * feat_height, feat_width]
        rpn_labels = labels.view(batch_size, 1, self._num_anchors  * feat_height, feat_width)

        # bbox_targets
        #[batch_size, feat_height, feat_width,_num_anchors *4]
        rpn_bbox_targets = bbox_targets.view(batch_size, feat_height, feat_width, self._num_anchors *4)#.permute(0,3,1,2).contiguous()

        # bbox_inside_weights
        anchors_count = bbox_inside_weights.size(1)
        #[batch_size,anchors_count,4]
        bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        #[batch_size, feat_height, feat_width,._num_anchors *4]
        rpn_bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, feat_height, feat_width, 4*self._num_anchors )\
                            # .permute(0,3,1,2).contiguous()

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        #[batch_size, feat_height, feat_width,._num_anchors *4]
        rpn_bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, feat_height, feat_width, 4*self._num_anchors )\
                            # .permute(0,3,1,2).contiguous()
        '''
        rpn_labels:                             [b, 1,num_anchors*feat_height, feat_width]
        rpn_bbox_targets:                       [b, feat_height, feat_width,num_anchors*4]
        rpn_bbox_inside_weights:                [b, feat_height, feat_width,num_anchors*4]
        rpn_bbox_outside_weights:               [b, feat_height, feat_width,num_anchors*4]
        '''
        return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

一、代码解析

这里有一个generate_anchors_pre,好容易理解,就是之前用来生成整幅feature map的anchors。

这里使用了_allowed_border来限制anchor与feature map边框的距离。

torch.stack就是将一张图的anchors放到批量里面,这里就要求传进来的image大小是一样的。

变量声明:

  • labels:也就是rpn中fg/bg的标签,初始化为-1.其中0表示bg,1表示fg。大小的话就是[batch_size,total_anchors],也就是需要对每个anchors都进行标注。
  • bbox_inside_weights:网上说是fg回归的权重,我们可以发现,只有labes=1的时候才设置,其实就是只设置了fg的权重,而bg是没有参与到smooth l1的,值域smooth l1在后面会讲到。[batch_size,total_anchors]
  • bbox_outside_weights这一部分就是当smooth1 l1计算完之后,对rpn bbox regression loss的权重 。[batch_size,total_anchors]
overlaps也就是计算rpn_target中生成的anchor与真实的gt_boxes之间的iou。
def bbox_overlaps_batch(anchors, gt_boxes):
    """
    anchors: (N, 4) ndarray of float
    gt_boxes: (b, K, 5) ndarray of float

    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    batch_size = gt_boxes.size(0)

    if anchors.dim() == 2:

        N = anchors.size(0)
        K = gt_boxes.size(1)

        anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous()
        gt_boxes = gt_boxes[:, :, :4].contiguous()

        gt_boxes_x = (gt_boxes[:, :, 2] - gt_boxes[:, :, 0] + 1)
        gt_boxes_y = (gt_boxes[:, :, 3] - gt_boxes[:, :, 1] + 1)
        gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)

        anchors_boxes_x = (anchors[:, :, 2] - anchors[:, :, 0] + 1)
        anchors_boxes_y = (anchors[:, :, 3] - anchors[:, :, 1] + 1)
        anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)

        gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
        anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)

        boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
        query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)

        iw = (torch.min(boxes[:, :, :, 2], query_boxes[:, :, :, 2]) -
              torch.max(boxes[:, :, :, 0], query_boxes[:, :, :, 0]) + 1)
        iw[iw < 0] = 0

        ih = (torch.min(boxes[:, :, :, 3], query_boxes[:, :, :, 3]) -
              torch.max(boxes[:, :, :, 1], query_boxes[:, :, :, 1]) + 1)
        ih[ih < 0] = 0
        ua = anchors_area + gt_boxes_area - (iw * ih)
        overlaps = iw * ih / ua

        # mask the overlap here.
        overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
        overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)

    elif anchors.dim() == 3:
        N = anchors.size(1)
        K = gt_boxes.size(1)

        if anchors.size(2) == 4:
            anchors = anchors[:, :, :4].contiguous()
        else:
            anchors = anchors[:, :, 1:5].contiguous()

        gt_boxes = gt_boxes[:, :, :4].contiguous()

        gt_boxes_x = (gt_boxes[:, :, 2] - gt_boxes[:, :, 0] + 1)
        gt_boxes_y = (gt_boxes[:, :, 3] - gt_boxes[:, :, 1] + 1)
        gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)

        anchors_boxes_x = (anchors[:, :, 2] - anchors[:, :, 0] + 1)
        anchors_boxes_y = (anchors[:, :, 3] - anchors[:, :, 1] + 1)
        anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)

        gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
        anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)

        boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
        query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)

        iw = (torch.min(boxes[:, :, :, 2], query_boxes[:, :, :, 2]) -
              torch.max(boxes[:, :, :, 0], query_boxes[:, :, :, 0]) + 1)
        iw[iw < 0] = 0

        ih = (torch.min(boxes[:, :, :, 3], query_boxes[:, :, :, 3]) -
              torch.max(boxes[:, :, :, 1], query_boxes[:, :, :, 1]) + 1)
        ih[ih < 0] = 0
        ua = anchors_area + gt_boxes_area - (iw * ih)

        # Intersection (iw * ih) divided by Union (ua)
        overlaps = iw * ih / ua

        # mask the overlap here.
        overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
        overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
    else:
        raise ValueError('anchors input dimension is not correct.')

    return overlaps

因为代码是期望通过批量操作的,因此直接说anchors.dim()=3的那部分。

N、K分别表示rpn_target生成的anchor个数以及、真实的gt_boxes个数,从侧面也可以看到,批量处理中还是希望gt_boxes也是每张图都一样多的。

从anchor的dim=2的[1:5]中取出bbox,也就是第0为是与bg/fg有关的。

而gt_boxes的dim=2中[0:4]是bbox,第4为label

a、首先求出gt_boxes的面积area,并view成[batch_size, 1, K],以及anchors面积并view成[batch_size,N,1]

b、继续将anchor view成[batch_size,N,4]->[batch_size,N, 1,4],然后将anchor继续扩展到每个anchor对应到gt_boxes。[batch_size, N, K, 4]

c、将gt_boxes从[batch_size, K, 4] view到[batch_size, 1, K, 4],然后继续扩展到[batch_size, N, K, 4]

d、计算overlaps

e、利用mask对overlap进行掩码操作,也就是gt_area=0的设置为0,对anchors_area=0设置为-1

计算完overlap之后,就可以利用overlap进行相应的nms操作。

overlap为[batch_size, total_anchors, gt_num]也就是每个anchor与对应的gt_num都有overlap。

计算每个anchor交集最大gt_boxes的max_overlaps和索引argmax_overlaps

以及每个gt_boxes与所有anchor交集的最大gt_max_overlap。

标记每个anchor的max_overlap

如果gt_max_overlaps=0则重置为1e-5.

计算overlap=gt_max_overlap的索引扩展[batch_size, total_anchors, gt_num]并使用torch.sum中每个anchor的对应的gt_boxes只和

将keep>0的labels=1将overlap=gt_max_overlap的部分设置为前景。

然后将max_overlap>=rpn_positive_overlap对应的labels=1也设置为前景

为了避免操作上的重叠重新将max_overlaps

计算rcnn回归中batch中fg的个数

然后计算labels中fg和bg的个数

针对每个批次单独操作。从labels的fg中随机选择num_fg个fg,并标记为-1。从labels的bg中随机选择num_bg个bg,并将labels=-1。其实就是不进行后面的计算。

计算偏移量offset,主要是为了将不同image中的不同个数的gt_boxes一起运算。

将argmax_overlaps = argmax_overlaps + offset

计算bbox target

def _compute_targets_batch(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image."""

    return bbox_transform_batch(ex_rois, gt_rois[:, :, :4])
def bbox_transform_batch(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
    wx, wy, ww, wh = weights

    if ex_rois.dim() == 2:
        ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
        ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
        ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
        ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights

        gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
        gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
        gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
        gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights

        targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths
        targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights
        targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths))
        targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights))

    elif ex_rois.dim() == 3:
        ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0
        ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0
        ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths
        ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights

        gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
        gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
        gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
        gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights

        targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
        targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
        targets_dw = torch.log(gt_widths / ex_widths)
        targets_dh = torch.log(gt_heights / ex_heights)
    else:
        raise ValueError('ex_roi input dimension is not correct.')

    targets = torch.stack(
        (targets_dx, targets_dy, targets_dw, targets_dh),2)

    return targets

将box_target表示为中心点偏移+log(gt_w/ac_w)

将labels=1的bbox_inside_weights设置为rpn_bbox_inside_weights

计算bbox_outside_weights中的fg和bg都设置为1/fg_num

def _unmap(data, count, inds, batch_size, fill=0):
    """ Unmap a subset of item (data) back to the original set of items (of
    size count) """

    if data.dim() == 2:
        ret = torch.Tensor(batch_size, count).fill_(fill).type_as(data)
        ret[:, inds] = data
    else:
        ret = torch.Tensor(batch_size, count, data.size(2)).fill_(fill).type_as(data)
        ret[:, inds,:] = data
    return ret

将labels、bbox_targets、bbox_inside_weights、bbox_outside_weights重新映射到期望大小。

二、总结

'''
rpn_labels:                             [b, 1,num_anchors*feat_height, feat_width]#rpn的fg/bg标签(1,0)如果为-1则表示不运算
rpn_bbox_targets:                       [b, feat_height, feat_width,num_anchors*4]#rpn的bbox的target
rpn_bbox_inside_weights:                [b, feat_height, feat_width,num_anchors*4]#rpn的fg权重
rpn_bbox_outside_weights:               [b, feat_height, feat_width,num_anchors*4]#rpn的smooth l1的权重
'''

你可能感兴趣的:(object,detection)