接在tensorflow+faster rcnn代码理解(一):构建vgg前端和RPN网络之后,对于每张输入图像(600×800)RPN会产生17100个anchor,构建RPN后会输出4个tensor,维度如下:
先放出来总的结构图:
def build_proposals(self, is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score):
if is_training:
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") #调用network.py中的_proposal_layer函数,根据anchor的概率(rpn_cls_prob)和位置(bbox_pred)选出rois
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") #调用network.py中的_anchor_target_layer函数生成258个anchor用以训练RPN
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") #调用network.py中的_proposal_target_layer,从rois中选择128个rois训练fastrcnn
else:
if cfg.FLAGS.test_mode == 'nms':
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.FLAGS.test_mode == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError
return rois
在训练中要完成:
在测试(test or inference)中只需要完成proposal_layer:从17100个anchor中选择出选择300(测试的时候是供给300个proposal)给fast rcnn部分。图中蓝线部分。
代码:
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
"""
Same as the anchor target layer in original Fast/er RCNN
从17100个anchor中选出256个anchor训练rpn
"""
A = num_anchors
total_anchors = all_anchors.shape[0]
K = total_anchors / num_anchors
im_info = im_info[0]
# allow boxes to sit over the edge by a small amount
_allowed_border = 0
# map of shape (..., H, W)
height, width = rpn_cls_score.shape[1:3] #rpn_cls_score =(1,38,50,18)
# only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_info[1] + _allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + _allowed_border) # height
)[0]
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside),), dtype=np.float32)
labels.fill(-1)
# overlaps between the anchors and the gt boxes
# overlaps (ex, gt) overlap的行数代表的是anchor个数,列数代表的ground-truth的个数
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1) #返回列号,表示每一个anchor与哪个gt有最大重叠 (axis=1表示返回每一行的最大值)
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #重叠的百分率
gt_argmax_overlaps = overlaps.argmax(axis=0) #返回与每一个gt重叠率最高的anchor的序号 (axis=0表示返回每一列的最大值)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])] #最高的重叠率
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #返回与每一个gt重叠率最高的anchor的序号 这时算上了相同最高重叠率的序号
if not cfg.FLAGS.rpn_clobber_positives:
# assign bg labels first so that positive labels can clobber them
# first set the negatives
labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1
if cfg.FLAGS.rpn_clobber_positives:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0
# subsample positive labels if we have too many
num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
# subsample negative labels if we have too many
num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #计算box和gt的偏移量
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
# only the positive ones have regression targets
bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"])
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
if cfg.FLAGS.rpn_positive_weight < 0:
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.FLAGS.rpn_positive_weight > 0) &
(cfg.FLAGS.rpn_positive_weight < 1))
positive_weights = (cfg.FLAGS.rpn_positive_weight /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
np.sum(labels == 0))
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights
# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width))
rpn_labels = labels
# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4))
rpn_bbox_targets = bbox_targets
# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_inside_weights = bbox_inside_weights
# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_outside_weights = bbox_outside_weights
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
主要的输入:rpn_cls_score = (1,38,50,18)(用于获取W,H)gt_boxes(3,4),all_anchors(17100,4)。
步骤:
1.1.1 筛选(就当做是预处理)
(1)对于生成的all_anchors,保留在image内部的,这些内部的anchor的序号为inds_inside。17100->9340
(2)初始label = (9340,),值为-1
1.1.2 重叠率的计算
(1)假设该图片的gt数量为3,则计算每一个anchor与gt的重叠率,得到overlaps,overlaps = (9340,3)
(2)计算每一个anchor与哪个gt有最大重叠,即argmax_overlaps。argmax_overlaps = (9340,)。argmax_overlaps的值为gt的序号:如0、1...
(3)得到上述的重叠率max_overlaps。max_overlaps = (9340,)
(4)返回与每一个gt重叠率最高的anchor的序号gt_argmax_overlaps,gt_argmax_overlaps = (3,)
(5)的到上述的重叠率gt_argmax_overlaps。gt_argmax_overlaps=(3,)
(6)因为(4)只选出了与每一个gt重叠率最高的其中一个anchor,还存在其他anchor也有相同的重叠率,返回这些anchor的序号,gt_argmax_overlaps。gt_argmax_overlaps = (171, )
1.1.3 labels的计算
(1)首先将与每一个gt重叠率最高的anchor设置为fg(这些anchor的序号为gt_argmax_overlaps ),labels = 1
(2)将重叠率(max_overlaps)大于0.5的anchor设置为fg,labels = 1
(3)规定的fg数量为256*0.5=128。
① 如果1、2步骤得到的fg>128,则采样出128个,则剩余的labels=-1
② 如果1、2步骤得到的fg<=128,则无需采样
(4)将重叠率(max_overlaps)小于0.3的anchor设置为bg,labels = 0
(5)规定的bg数量为256 - fg_num.
① 如果(4)步骤得到的bg> 256 - fg_num,,则进行采样,剩余的labels = -1
② 如果4步骤得到的bg< 256 - fg_num,,则无需采样
ps:一般情况fg的数量都会<=128,因为fg的要求重叠率>0.5,一般很难完全找满128个。而重叠率<0.3也就是标记为bg的数量远远会多,因此经常发生的情况是fg不进行下采样,而bg进行下采样,也就是256个anchor中,fg的数量一般会小于bg的数量。
1.1.4 计算偏移参数
(1)计算每一个anchor与其重叠率最大的gt之间的偏移参数(dx,dy,dw,dh),记为bbox_targets。bbox_targets = (9340,4)
(2)对于每一个acnhor生成bbox_inside_weights,bbox_inside_weights = (9340,4)但是对于fg,为1;bg为0;忽略为0
(3)对于每一个acnhor生成bbox_outside_weights,bbox_outside_weights = (9340,4),fg和bg的值都为1/256,忽略为0
1.1.5 map up to original set of anchors
因为上面的labels、bbox_targets、bbox_inside_weights,bbox_outside_weights行数都是9340,也就是都是在落在图片内的anchor的基础上编号的,需要将其回复到原始的17100下的编号,此步骤需要用到inds_inside。
(1)恢复labels,那些落在图像外的anchor的label为-1
(2)恢复bbox_targets,那些落在图像外的anchor的位置为0
(3)恢复bbox_inside_weights,那些落在图像外的anchor的位置为0
(4)恢复bbox_outside_weights,那些落在图像外的anchor的位置为0
1.1.6 变换成需要的形式
(1)label(17100,)reshape(1,38,50,9)transpose(1,9,38,50)reshape(1,1,342,50),将其赋予给rpn_labels
(2)bbox_targets(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_targets
(3)bbox_inside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_inside_weights
(4)bbox_outside_weights(17100,4)reshape(1,38,50,36),将其赋予给rpn_bbox_outside_weights
anchor_target_layer的各个输出及维度为:
顺便生成训练RPN网络的标签信息:
self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights
代码:
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
"""A simplified version compared to fast/er RCNN
For details please see the technical report
对应于proposalCreator,也就是根据概率从17100个anchor中选择12000(6000)个anchor,
再经过NMS生成2000(300)个anchor送给fast-rcnn
"""
if type(cfg_key) == bytes:
cfg_key = cfg_key.decode('utf-8')
if cfg_key == "TRAIN":
pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #12000
post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000
nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #NMS= 0.7
else:
pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n #6000
post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300
nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #NMS= 0.7
im_info = im_info[0]
# 得到scores和bounding boxes rpn_cls_prob = (1,38,50,18) 其中第四维度前9位是背景的分数,后9位是前景的分数
scores = rpn_cls_prob[:, :, :, num_anchors:] #取出前景的分数 scores = (1,38,50,9)
rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) #rpn_bbox_pred = (1,38,50,36)->(38*50*9 = 17100,4)
scores = scores.reshape((-1, 1)) #scores = (17100,1)
proposals = bbox_transform_inv(anchors, rpn_bbox_pred) #经过修正后的anchors,即proposals
proposals = clip_boxes(proposals, im_info[:2])
# Pick the top region proposals 选出分数较高的12000个anchors
order = scores.ravel().argsort()[::-1]
if pre_nms_topN > 0:
order = order[:pre_nms_topN]
proposals = proposals[order, :]
scores = scores[order]
# Non-maximal suppression 非极大值抑制
keep = nms(np.hstack((proposals, scores)), nms_thresh)
# Pick th top region proposals after NMS
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
scores = scores[keep]
# Only support single image as input
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
return blob, scores
主要的输入:rpn_cls_prob(1,38,50,18)、rpn_bbox_pred(1,38,50,36)、anchors(17100,4)
步骤:
(1)rpn_cls_prob中第四维度,前9位是背景的概率,后9位是前景的概率,所以首先要取出前景的概率,即scores = (1,38,50,9) ,之后reshape成(1×38×50×9,1)即(17100,1)
(2)将rpn_bbox_pred = (1,38,50,36) reshape成为(1×38×50×9,4),即rpn_bbox_pred=(17100,4)
(3)根据产生anchors和rpn_bbox_pred,对anchor进行修正,得到proposals=(17100,4)
(4)对scores进行降序排列
① 首先选出12000个概率最高的,此时proposals = (12000,4),scores =(12000,1)
② 利用proposals 和scores进行非极大值抑制,结果为proposals = (1214,4),scores =(1214,1)
(5)最后返回rois,rois在proposals 的基础上多了一列,为第一列,默认全为0,rois = (1214,5)
rois_scores = scores =(1214,1)
最终proposal_layer的输出及维度为:
代码
def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
"""
Assign object detection proposals to ground-truth targets. Produces proposal
classification labels and bounding-box regression targets.
"""
#pdb.set_trace()
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
all_scores = rpn_scores
# Include ground-truth boxes in the set of candidate rois
if cfg.FLAGS.proposal_use_gt:
zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
all_rois = np.vstack(
(all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
)
# not sure if it a wise appending, but anyway i am not using it
all_scores = np.vstack((all_scores, zeros))
num_images = 1
rois_per_image = cfg.FLAGS.batch_size / num_images
fg_rois_per_image = np.round(cfg.FLAGS.proposal_fg_fraction * rois_per_image) #每张图设置的fg数量
# Sample rois with classification labels and bounding box regression
# targets
labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
all_rois, all_scores, gt_boxes, fg_rois_per_image,
rois_per_image, _num_classes)
rois = rois.reshape(-1, 5)
roi_scores = roi_scores.reshape(-1)
labels = labels.reshape(-1, 1)
bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# overlaps: (rois x gt_boxes)
pdb.set_trace()
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1) #返回每一个anchor与第几个gt的重叠率最大
max_overlaps = overlaps.max(axis=1) #返回重叠率的数值
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
(max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]
# Small modification to the original version where we ensure a fixed number of regions are sampled
if fg_inds.size > 0 and bg_inds.size > 0:
fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
bg_rois_per_image = rois_per_image - fg_rois_per_image
to_replace = bg_inds.size < bg_rois_per_image
bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
elif fg_inds.size > 0:
to_replace = fg_inds.size < rois_per_image
fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = rois_per_image
elif bg_inds.size > 0:
to_replace = bg_inds.size < rois_per_image
bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = 0
else:
#pdb.set_trace()
bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
(max_overlaps >= 0.01))[0]
to_replace = bg_inds.size < rois_per_image
bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = 0
if bg_inds.size ==0:
pdb.set_trace()
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[int(fg_rois_per_image):] = 0
rois = all_rois[keep_inds]
roi_scores = all_scores[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
输入:proposal_layer 输出的rois(1214,5)和rois_scores(1214,1),以及gt_boxes(3,5)最后一列为标签
步骤:
1.3.1 准备
计算正负样本的数量。正样本 = batch_size×proposal_fg_fraction = 128×0.25 = 32,负样本 = 128-32 = 96
1.3.2 进行样本的采样过程_sample_rois
(1)计算重叠率和标签
① 计算rois与gt的重叠率overlaps,overlaps = (1214,3)
② 返回每一个anchor与第几个gt的重叠率最大,gt_assignment = (1214,)。gt_assignment的值为gt的序号:如0、1...
③ 返回上述的重叠率,即max_overlaps = (1214,)
④ 生成labels,max_overlaps = (1214,),其值是对应的gt的label,也就是为每一个anchor打上了标签
(2)从anchor中选择出正负样本
① 正样本的数量为fg_inds = overlaps>=0.5,负样本的数量为bg_inds = 0.5>overlaps>0.1
② 根据正负样本的数量进行抽样
if 正样本数量>32,从中抽样出32个,剩余96个为负样本
if 正样本数量<=32,则全部保留,此时负样本抽样出96个。
最终得到的fg_inds + bg_inds = 128.这里假设fg_inds=14,则bg_inds = 114
③ 根据fg_inds、bg_inds,得到最终样本。其中labels = (128,)(负样本的label置为0),rois = (128,5)(第一列为0),rois_scores = (128,)
(3)计算bbox_target_data和bbox_target
① 计算rois和gt的bbox_target_data。bbox_target_data = (128,5) 第一列为label
② 根据训练的类别数K和bbox_target_data,计算bbox_target=(128,4K)和bbox_inside_weights=(128,4K)
这里假定K=3(包括背景),bbox_target = (128,3×4) = (128,12),相当于前4列为背景,中间4列为第1个类别,最后4列为第2个类别。而其中只有对应类的bbox_targets才为非0,同理只有对应类的bbox_inside_weights为1。
proposal_target_layer的输出及维度为:
生成训练分类和回归网络的RoI以及对应的标签信息:
self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights
def build_predictions(self, net, rois, is_training, initializer, initializer_bbox):
# Crop image ROIs
pool5 = self._crop_pool_layer(net, rois, "pool5")
pool5_flat = slim.flatten(pool5, scope='flatten')
# Fully connected layers
fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
if is_training:
fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6')
fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
if is_training:
fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7')
# Scores and predictions
cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score')
cls_prob = self._softmax_layer(cls_score, "cls_prob")
bbox_prediction = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred')
return cls_score, cls_prob, bbox_prediction
最后返回:
最后基于本篇博客和上篇博客给出基于vgg的faster rcnn构建的总体过程代码,
def build_network(self, sess, is_training=True):
with tf.variable_scope('vgg_16', 'vgg_16'):
# select initializer
if cfg.FLAGS.initializer == "truncated":
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
# Build head
#pdb.set_trace()
net = self.build_head(is_training)
# Build rpn 创建rpn网络;该函数中有anchor的设置,修改可以改变anchor
#返回RPN二分类(前景、背景)的分数(cls_score),概率(cls_prob),以及位置修正参数(bbox_pred)
rpn_cls_prob, rpn_bbox_pred, rpn_cls_score, rpn_cls_score_reshape = self.build_rpn(net, is_training, initializer)
# Build proposals
#如果是训练,则从20000个anchors中选出258个训练RPN,并选择 20000->12000->128个rois训练fastrcnn,最终返回的128个rois
#如果是测试,则选择 20000->6000->300个rois进行检测,最终返回300个rois
rois = self.build_proposals(is_training, rpn_cls_prob, rpn_bbox_pred, rpn_cls_score)
# Build predictions
cls_score, cls_prob, bbox_pred = self.build_predictions(net, rois, is_training, initializer, initializer_bbox)
self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["cls_score"] = cls_score
self._predictions["cls_prob"] = cls_prob
self._predictions["bbox_pred"] = bbox_pred
self._predictions["rois"] = rois
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred