(一)生成anchor
/lib/rpn/generate_anchors.py
代码生成的9个anchor,分别为:
[[ -84. -40. 99. 55.]
[-176. -88. 191. 103.]
[-360. -184. 375. 199.]
[ -56. -56. 71. 71.]
[-120. -120. 135. 135.]
[-248. -248. 263. 263.]
[ -36. -80. 51. 95.]
[ -80. -168. 95. 183.]
[-168. -344. 183. 359.]]
尺度分别是 base_size (16) *(8,16,32)
[x1,y1,x2,y2]表示的是矩形框的左上角,右下角坐标;
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2**np.arange(3, 6)):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in xrange(ratio_anchors.shape[0])])
return anchors
ratio =[0.5,1,2],代表了该anchors 的宽高比 分别为:1:2,1:1,2:1;
scales=2**np.arange(3, 6)
array([ 8, 16, 32])
代表了三种倍数 , 8,16,32 .
3与3 结合得到9种比列;
(1)ratio 操作:
def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
#[0,0,15,15],[0.5,1,2]
'''
根据x_center,y_center 中心点的位置,生成三个[2:1,1:1,2:1]的比例的anchor
'''
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
##生成ws,hs 的list,如[0,0,15,15]的话,生成的w为array([21.21320344, 15. , 10.60660172]),
#再根据 ratios 为[2:1,1:1,1:2]生成相应的hlist,
# 得到的x,y center + w,h list 就可以得到3 种anchor 了
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
#得到x,y center 与w,h 函数:
def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors
经过2:1,1:1,1:2 的比例后,原始的array 为:
[[ -3.5 2. 18.5 13. ]
[ 0. 0. 15. 15. ]
[ 2.5 -3. 12.5 18. ]]
该矩阵再经过乘不同倍数得到
- _scale_enum()函数用来将 w,h分别乘 [scale]倍数,得到最终的anchors
def _scale_enum(anchor, scales):
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
最终anchors 示意图:
(二)AnchorTargetLayer
该部分代码位于lib/rpn/anchor_target_layer.py
该层的定义:
layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}
该层接收RoiDataLayer 传来的 gt_boxes ,im_info,data 等数据,
,此处的feat_stride参数代表了每次滑动的窗口大小;
- bottom[0] 指的就是 rpn_cls_score
- bottom[1] 指的是 gt_boxes
- bottom[2] 指的是 im_info
- bottom[3] 指的是 data
forward
对每一个(h,w),生成9种不同形状的anchor,然后仅仅保留范围在原图中的anchor.
'''
当width 取61,heigh 取36的时候:
shift_x = np.arange(0, width) * self._feat_stride
shift_x
array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816,
832, 848, 864, 880, 896, 912, 928, 944, 960])
shift_y = np.arange(0, height) * self._feat_stride
>>> shift_y
array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560])
#组合成grid
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
>>> shift_x
array([[ 0, 16, 32, ..., 928, 944, 960],
[ 0, 16, 32, ..., 928, 944, 960],
[ 0, 16, 32, ..., 928, 944, 960],
...,
[ 0, 16, 32, ..., 928, 944, 960],
[ 0, 16, 32, ..., 928, 944, 960],
[ 0, 16, 32, ..., 928, 944, 960]])
>>> shift_y
array([[ 0, 0, 0, ..., 0, 0, 0],
[ 16, 16, 16, ..., 16, 16, 16],
[ 32, 32, 32, ..., 32, 32, 32],
...,
[528, 528, 528, ..., 528, 528, 528],
[544, 544, 544, ..., 544, 544, 544],
[560, 560, 560, ..., 560, 560, 560]])
#最后生成四个坐标(x1,y1,x2,y2)的偏移值:
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),\
shift_x.ravel(), shift_y.ravel())).transpose()
>>> shifts
array([[ 0, 0, 0, 0],
[ 16, 0, 16, 0],
[ 32, 0, 32, 0],
...,
[928, 560, 928, 560],
[944, 560, 944, 560],
[960, 560, 960, 560]])
'''
A = anchors 的数量,为9
K = width * height
将偏移值与 anchor 相加,得到预测框的坐标
A = self._num_anchors
K = shifts.shape[0]
all_anchors = (self._anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
inds_inside是得到那些在图片内的框的ID
# only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border) # height
)[0]
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
if DEBUG:
print 'anchors.shape', anchors.shape
计算各个预设anchor框与真实框GT 的IOU,目的是找最接近真实框GT的anchors,设置为FG,计算loss,去掉那些属于背景的anchor框;
#计算anchors 与真实框的IOU
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1)
#max_overlaps是每个anchor对应最大的overlap值
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
#gt_max_overlaps 是每个gt_boxes 对应的最大overlap值
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
#给每一个GT对应的最高iou anchors 都分配为 1
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
#大于IOU设置值的anchor 也设为1
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
正负各取样,保持正负样本1:1
# subsample positive labels if we have too many
#接下来两步工作是为了让正样本与负样本严格保持1:1
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
#如果FG的数量大于所需数量.则随机抽取一部分
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
#其他anchors的选择不要,设置为-1
labels[disable_inds] = -1
# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
还原target大小,方便将数据传递上一层网络:
# map up to original set of anchors
# #还记得文初将all_anchors裁减掉了2/3左右,仅仅保留在图像内的anchor吗,
# 将砍掉的图像外的anchors 还原,作为下一层的输入
# 并reshape成相应的格式
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
if DEBUG:
print 'rpn: max max_overlap', np.max(max_overlaps)
print 'rpn: num_positive', np.sum(labels == 1)
print 'rpn: num_negative', np.sum(labels == 0)
self._fg_sum += np.sum(labels == 1)
self._bg_sum += np.sum(labels == 0)
self._count += 1
print 'rpn: num_positive avg', self._fg_sum / self._count
print 'rpn: num_negative avg', self._bg_sum / self._count
# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width))
top[0].reshape(*labels.shape)
top[0].data[...] = labels
# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
top[1].reshape(*bbox_targets.shape)
top[1].data[...] = bbox_targets
# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_inside_weights.shape[2] == height
assert bbox_inside_weights.shape[3] == width
top[2].reshape(*bbox_inside_weights.shape)
top[2].data[...] = bbox_inside_weights
# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_outside_weights.shape[2] == height
assert bbox_outside_weights.shape[3] == width
top[3].reshape(*bbox_outside_weights.shape)
top[3].data[...] = bbox_outside_weights
target bounding-box的回归计算公式:
def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
#计算target 与anchors 的偏移量,方便回归loss计算
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def bbox_transform(ex_rois, gt_rois):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
return targets
总结:AnchorTargetLayer 该类的作用是 :
- 通过预设步长feat_stride,遍历图像,得到每个小格子的坐标
- 然后将预设生anchors[x1,y1,x2,y2] 作用到格子中,形成了A * K个 anchors
- 去掉处于图像边缘外面的anchors
- 分别计算每个anchors 与GT的 IOU, IOU大于阈值的 将其label 设置为1(前景),IOU小于阈值的设置为0(背景)
- 采样,使正负样本 保持1:1比例