【tensorflow + Faster RCNN】anchor_target_layer、proposal_target_layer、proposal_layer

接在tensorflow+faster rcnn代码理解(一):构建vgg前端和RPN网络之后,对于每张输入图像(600×800)RPN会产生17100个anchor,构建RPN后会输出4个tensor,维度如下:

  • rpn_cls_prob:(1,38,50,18)
  • rpn_bbox_pred:(1,38,50,36)
  • rpn_cls_score:(1,38,50,18)
  • rpn_cls_score_reshape:(1,342,50,2)

先放出来总的结构图:

【tensorflow + Faster RCNN】anchor_target_layer、proposal_target_layer、proposal_layer_第1张图片

1.构建proposal(build_proposals函数)

    def build_proposals(self, is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score):
        if is_training:
            rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")     #调用network.py中的_proposal_layer函数,根据anchor的概率(rpn_cls_prob)和位置(bbox_pred)选出rois
            rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")                  #调用network.py中的_anchor_target_layer函数生成258个anchor用以训练RPN

            # Try to have a deterministic order for the computing graph, for reproducibility
            with tf.control_dependencies([rpn_labels]):
                rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")        #调用network.py中的_proposal_target_layer,从rois中选择128个rois训练fastrcnn
        else:
            if cfg.FLAGS.test_mode == 'nms':
                rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            elif cfg.FLAGS.test_mode == 'top':
                rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            else:
                raise NotImplementedError
        return rois

训练中要完成:

  • anchor_target_layer:从17100个anchor中选择256个训练RPN网络,即rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
  • proposal_layer:从17100个anchor中选择出选择12000个作为rois供给fast rcnn部分,即rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")     
  • proposal_target_layer:在完成(2)的基础上从2000个rois中选择出128个训练fast rcnn,即rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")

测试(test or inference)中只需要完成proposal_layer:从17100个anchor中选择出选择300(测试的时候是供给300个proposal)给fast rcnn部分图中蓝线部分。

1.1 anchor_target_layer,训练RPN

代码:

def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """
    Same as the anchor target layer in original Fast/er RCNN 
    从17100个anchor中选出256个anchor训练rpn
    """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]
    
    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]   #rpn_cls_score =(1,38,50,18)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)   overlap的行数代表的是anchor个数,列数代表的ground-truth的个数
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)    #返回列号,表示每一个anchor与哪个gt有最大重叠 (axis=1表示返回每一行的最大值)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]  #重叠的百分率
    gt_argmax_overlaps = overlaps.argmax(axis=0)  #返回与每一个gt重叠率最高的anchor的序号 (axis=0表示返回每一列的最大值)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,    
                               np.arange(overlaps.shape[1])]  #最高的重叠率
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #返回与每一个gt重叠率最高的anchor的序号  这时算上了相同最高重叠率的序号

    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #计算box和gt的偏移量

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights

主要的输入:rpn_cls_score = (1,38,50,18)(用于获取W,H)gt_boxes(3,4)all_anchors(17100,4)

步骤:

1.1.1 筛选(就当做是预处理)

(1)对于生成的all_anchors,保留在image内部的,这些内部的anchor的序号为inds_inside。17100->9340

(2)初始label = (9340,),值为-1

1.1.2 重叠率的计算

(1)假设该图片的gt数量为3,则计算每一个anchor与gt的重叠率,得到overlaps,overlaps = (9340,3)

(2)计算每一个anchor与哪个gt有最大重叠,即argmax_overlaps。argmax_overlaps = (9340,)。argmax_overlaps的值为gt的序号:如0、1...

(3)得到上述的重叠率max_overlaps。max_overlaps = (9340,)

(4)返回与每一个gt重叠率最高的anchor的序号gt_argmax_overlaps,gt_argmax_overlaps = (3,)

(5)的到上述的重叠率gt_argmax_overlaps。gt_argmax_overlaps=(3,)

(6)因为(4)只选出了与每一个gt重叠率最高的其中一个anchor,还存在其他anchor也有相同的重叠率,返回这些anchor的序号,gt_argmax_overlaps。gt_argmax_overlaps = (171, )
1.1.3 labels的计算

(1)首先将与每一个gt重叠率最高的anchor设置为fg(这些anchor的序号为gt_argmax_overlaps ),labels = 1

(2)将重叠率(max_overlaps)大于0.5的anchor设置为fg,labels = 1

(3)规定的fg数量为256*0.5=128。
        ① 如果1、2步骤得到的fg>128,则采样出128个,则剩余的labels=-1
        ② 如果1、2步骤得到的fg<=128,则无需采样

(4)将重叠率(max_overlaps)小于0.3的anchor设置为bg,labels = 0

(5)规定的bg数量为256 - fg_num.
        ① 如果(4)步骤得到的bg> 256 - fg_num,,则进行采样,剩余的labels = -1
        ② 如果4步骤得到的bg< 256 - fg_num,,则无需采样
    ps:一般情况fg的数量都会<=128,因为fg的要求重叠率>0.5,一般很难完全找满128个。而重叠率<0.3也就是标记为bg的数量远远会多,因此经常发生的情况是fg不进行下采样,而bg进行下采样,也就是256个anchor中,fg的数量一般会小于bg的数量。

1.1.4 计算偏移参数
(1)计算每一个anchor与其重叠率最大的gt之间的偏移参数(dx,dy,dw,dh),记为bbox_targets。bbox_targets = (9340,4)

(2)对于每一个acnhor生成bbox_inside_weights,bbox_inside_weights = (9340,4)但是对于fg,为1;bg为0;忽略为0

(3)对于每一个acnhor生成bbox_outside_weights,bbox_outside_weights = (9340,4),fg和bg的值都为1/256,忽略为0

1.1.5 map up to original set of anchors

因为上面的labels、bbox_targets、bbox_inside_weights,bbox_outside_weights行数都是9340,也就是都是在落在图片内的anchor的基础上编号的,需要将其回复到原始的17100下的编号,此步骤需要用到inds_inside。
(1)恢复labels,那些落在图像外的anchor的label为-1
(2)恢复bbox_targets,那些落在图像外的anchor的位置为0
(3)恢复bbox_inside_weights,那些落在图像外的anchor的位置为0
(4)恢复bbox_outside_weights,那些落在图像外的anchor的位置为0

1.1.6 变换成需要的形式
(1)label(17100,)reshape(1,38,50,9)transpose(1,9,38,50)reshape(1,1,342,50),将其赋予给rpn_labels
(2)bbox_targets(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_targets
(3)bbox_inside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_inside_weights
(4)bbox_outside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_outside_weights

anchor_target_layer的各个输出及维度为:

  • rpn_labels:(1,1,342,50)
  • rpn_bbox_targets:(1,38,50,36)
  • rpn_bbox_inside_weights:(1,38,50,36)
  • rpn_bbox_outside_weights:(1,38,50,36)

顺便生成训练RPN网络的标签信息:

self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights

1.2 proposal_layer,供给候选区

代码:

def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
       对应于proposalCreator,也就是根据概率从17100个anchor中选择12000(6000)个anchor,
       再经过NMS生成2000(300)个anchor送给fast-rcnn
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n      #12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n    #2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh           #NMS= 0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n       #6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n     #300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh            #NMS= 0.7

    im_info = im_info[0]
    # 得到scores和bounding boxes  rpn_cls_prob = (1,38,50,18)  其中第四维度前9位是背景的分数,后9位是前景的分数
    scores = rpn_cls_prob[:, :, :, num_anchors:] #取出前景的分数 scores = (1,38,50,9) 
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) #rpn_bbox_pred = (1,38,50,36)->(38*50*9 = 17100,4)
    scores = scores.reshape((-1, 1))  #scores = (17100,1)
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)  #经过修正后的anchors,即proposals
    proposals = clip_boxes(proposals, im_info[:2])
    
    # Pick the top region proposals  选出分数较高的12000个anchors
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
   
    # Non-maximal suppression  非极大值抑制
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    
    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores

主要的输入:rpn_cls_prob(1,38,50,18)、rpn_bbox_pred(1,38,50,36)、anchors(17100,4)

步骤:

(1)rpn_cls_prob中第四维度,前9位是背景的概率,后9位是前景的概率,所以首先要取出前景的概率,即scores = (1,38,50,9) ,之后reshape成(1×38×50×9,1)即(17100,1)

(2)将rpn_bbox_pred = (1,38,50,36) reshape成为(1×38×50×9,4),即rpn_bbox_pred=(17100,4)
(3)根据产生anchors和rpn_bbox_pred,对anchor进行修正,得到proposals=(17100,4)
(4)对scores进行降序排列
          ① 首先选出12000个概率最高的,此时proposals = (12000,4),scores =(12000,1)
          ② 利用proposals 和scores进行非极大值抑制,结果为proposals = (1214,4),scores =(1214,1)
(5)最后返回rois,rois在proposals 的基础上多了一列,为第一列,默认全为0,rois = (1214,5)
         rois_scores = scores =(1214,1)

最终proposal_layer的输出及维度为:

  • rois: (1214,5)
  • rois_scores:(1214,1)

1.3 proposal_target_layer,提供rois训练fastrcnn

代码

def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    """
    #pdb.set_trace()
    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
    all_rois = rpn_rois
    all_scores = rpn_scores

    # Include ground-truth boxes in the set of candidate rois
    if cfg.FLAGS.proposal_use_gt:
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        # not sure if it a wise appending, but anyway i am not using it
        all_scores = np.vstack((all_scores, zeros))

    num_images = 1
    rois_per_image = cfg.FLAGS.batch_size / num_images
    fg_rois_per_image = np.round(cfg.FLAGS.proposal_fg_fraction * rois_per_image) #每张图设置的fg数量

    # Sample rois with classification labels and bounding box regression
    # targets
    labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
        all_rois, all_scores, gt_boxes, fg_rois_per_image,
        rois_per_image, _num_classes)

    rois = rois.reshape(-1, 5)
    roi_scores = roi_scores.reshape(-1)
    labels = labels.reshape(-1, 1)
    bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
    bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)

    return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    pdb.set_trace()
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)  #返回每一个anchor与第几个gt的重叠率最大
    max_overlaps = overlaps.max(axis=1)  #返回重叠率的数值
    labels = gt_boxes[gt_assignment, 4]
    
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        #pdb.set_trace()
        bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= 0.01))[0]
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
        if bg_inds.size ==0:
            pdb.set_trace()
   
    
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights

输入:proposal_layer 输出的rois(1214,5)和rois_scores(1214,1),以及gt_boxes(3,5)最后一列为标签

步骤:

1.3.1  准备

           计算正负样本的数量。正样本 = batch_size×proposal_fg_fraction = 128×0.25 = 32,负样本 = 128-32 = 96
1.3.2  进行样本的采样过程_sample_rois
(1)计算重叠率和标签
      ① 计算rois与gt的重叠率overlaps,overlaps = (1214,3)
      ② 返回每一个anchor与第几个gt的重叠率最大,gt_assignment = (1214,)。gt_assignment的值为gt的序号:如0、1...
      ③ 返回上述的重叠率,即max_overlaps = (1214,)
      ④ 生成labels,max_overlaps = (1214,),其值是对应的gt的label,也就是为每一个anchor打上了标签
(2)从anchor中选择出正负样本
      ① 正样本的数量为fg_inds = overlaps>=0.5,负样本的数量为bg_inds = 0.5>overlaps>0.1
      ② 根据正负样本的数量进行抽样
         if 正样本数量>32,从中抽样出32个,剩余96个为负样本
         if 正样本数量<=32,则全部保留,此时负样本抽样出96个。
        最终得到的fg_inds  + bg_inds = 128.这里假设fg_inds=14,则bg_inds = 114
      ③ 根据fg_inds、bg_inds,得到最终样本。其中labels = (128,)(负样本的label置为0),rois = (128,5)(第一列为0),rois_scores = (128,)
(3)计算bbox_target_data和bbox_target
      ① 计算rois和gt的bbox_target_data。bbox_target_data = (128,5) 第一列为label
      ② 根据训练的类别数K和bbox_target_data,计算bbox_target=(128,4K)和bbox_inside_weights=(128,4K)
           这里假定K=3(包括背景),bbox_target = (128,3×4) = (128,12),相当于前4列为背景,中间4列为第1个类别,最后4列为第2个类别。而其中只有对应类的bbox_targets才为非0,同理只有对应类的bbox_inside_weights为1。
proposal_target_layer的输出及维度为:

  •     rois = (128,5)(第一列为0)
  •     rois_scores =(128,)
  •     labels = (128,1)
  •     bbox_target=(128,12)
  •     bbox_inside_weights = (128,12)
  •     bbox_outside_weights = (128,12)

生成训练分类和回归网络的RoI以及对应的标签信息:

self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights

2.构建fastrcnn部分以及进行预测(build_predictions函数)

def build_predictions(self, net, rois, is_training, initializer, initializer_bbox):

        # Crop image ROIs
        pool5 = self._crop_pool_layer(net, rois, "pool5")
        pool5_flat = slim.flatten(pool5, scope='flatten')

        # Fully connected layers
        fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
        if is_training:
            fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6')

        fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
        if is_training:
            fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7')

        # Scores and predictions
        cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score')
        cls_prob = self._softmax_layer(cls_score, "cls_prob")
        bbox_prediction = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred')

        return cls_score, cls_prob, bbox_prediction

最后返回:

  • cls_score:(128,12)
  • cls_prob:(128,3)
  • bbox_prediction:(128,3)

3.小结

最后基于本篇博客和上篇博客给出基于vgg的faster rcnn构建的总体过程代码,

def build_network(self, sess, is_training=True):
        with tf.variable_scope('vgg_16', 'vgg_16'):

            # select initializer
            if cfg.FLAGS.initializer == "truncated":
                initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
                initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
            else:
                initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
                initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

            # Build head
            #pdb.set_trace()
            net = self.build_head(is_training)

            # Build rpn  创建rpn网络;该函数中有anchor的设置,修改可以改变anchor
            #返回RPN二分类(前景、背景)的分数(cls_score),概率(cls_prob),以及位置修正参数(bbox_pred)
            rpn_cls_prob, rpn_bbox_pred, rpn_cls_score, rpn_cls_score_reshape = self.build_rpn(net, is_training, initializer)

            # Build proposals  
            #如果是训练,则从20000个anchors中选出258个训练RPN,并选择 20000->12000->128个rois训练fastrcnn,最终返回的128个rois
            #如果是测试,则选择 20000->6000->300个rois进行检测,最终返回300个rois
            rois = self.build_proposals(is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score)

            # Build predictions
            cls_score, cls_prob, bbox_pred = self.build_predictions(net, rois, is_training, initializer, initializer_bbox)

            self._predictions["rpn_cls_score"] = rpn_cls_score
            self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
            self._predictions["rpn_cls_prob"] = rpn_cls_prob
            self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
            self._predictions["cls_score"] = cls_score
            self._predictions["cls_prob"] = cls_prob
            self._predictions["bbox_pred"] = bbox_pred
            self._predictions["rois"] = rois

            self._score_summaries.update(self._predictions)

            return rois, cls_prob, bbox_pred

 

你可能感兴趣的:(tensorflow,faster,rcnn,tensorflow,faster,rcnn)