IMAGE_SHAPE [1024 1024 3]
BACKBONE_STRIDES [4, 8, 16, 32, 64]
RPN_ANCHOR_SCALES (32, 64, 128, 256, 512)
RPN_ANCHOR_RATIOS [0.5, 1, 2]
[[256, 256], 相对于原图的stride: 1024/256 = 4
[128, 128], 相对于原图的stride: 8
[ 64, 64], 相对于原图的stride: 16
[ 32, 32], 相对于原图的stride: 32
[ 16, 16]] 相对于原图的stride: 64
(原图大小为1024*1024, 这些特征图大小相对原图大小的strides就是上面的 BACKBONE_STRIDES [4, 8, 16, 32, 64])
生成anchors的时候是针对每个特征图大小的每个像素生成3个anchors!
scales 就是: RPN_ANCHOR_SCALES (32, 64, 128, 256, 512) 就是要生成的anchors的大小!
先对anchor大小为(32*32)的生成anchors,然后是(64*64)的, append到一起。
举例:(当 i = 0时, 即 scales[i] = 32, ratios = [0.5,1,2], feature_shape[i] = (256,256), feature_stride[i] = 4)
当特征图大小为256时,feature_shape就是(256,256),对应的BACKBONE_STRIDES为4, ratios就是RPN_ANCHOR_RATIOS [0.5, 1, 2]因为一个特征图的每个特征点都有3个anchors。
下面将这些参数传入generate_anchors这个函数:
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
# Get all combinations of scales and ratios
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
scales = scales.flatten()
ratios = ratios.flatten()
# Enumerate heights and widths from scales and ratios
heights = scales / np.sqrt(ratios)
widths = scales * np.sqrt(ratios)
# Enumerate shifts in feature space
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
# Enumerate combinations of shifts, widths, and heights
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
# Reshape to get a list of (y, x) and a list of (h, w)
box_centers = np.stack(
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
# Convert to corner coordinates (y1, x1, y2, x2)
boxes = np.concatenate([box_centers - 0.5 * box_sizes,
box_centers + 0.5 * box_sizes], axis=1)
return boxes
现在,传到这个函数的 scales = 32,ratios = [0.5,1,2],shape=(256,256), feature_stride = 4, anchor_stride 现在一直=1
然后运行每一步:
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
>>> scales
array([[32],
[32],
[32]])
>>> ratios.shape
(3, 1)
>>> ratios
array([[ 0.5],
[ 1. ],
[ 2. ]])
scales = scales.flatten()
>>> scales
array([32, 32, 32])
ratios = ratios.flatten()
>>> ratios
array([ 0.5, 1. , 2. ])
>>> heights = scales / np.sqrt(ratios)
>>> heights
array([ 45.254834, 32. , 22.627417]) #对应三个ratio
>>> widths = scales * np.sqrt(ratios)
>>> widths
array([ 22.627417, 32. , 45.254834])
>>> shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride #anchor_stride=1 之前当成4了所以就没想通。
>>> shifts_y
array([ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40,
44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84,
88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128,
132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172,
176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216,
220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260,
264, 268, 272, 276, 280, 284, 288, 292, 296, 300, 304,
308, 312, 316, 320, 324, 328, 332, 336, 340, 344, 348,
352, 356, 360, 364, 368, 372, 376, 380, 384, 388, 392,
396, 400, 404, 408, 412, 416, 420, 424, 428, 432, 436,
440, 444, 448, 452, 456, 460, 464, 468, 472, 476, 480,
484, 488, 492, 496, 500, 504, 508, 512, 516, 520, 524,
528, 532, 536, 540, 544, 548, 552, 556, 560, 564, 568,
572, 576, 580, 584, 588, 592, 596, 600, 604, 608, 612,
616, 620, 624, 628, 632, 636, 640, 644, 648, 652, 656,
660, 664, 668, 672, 676, 680, 684, 688, 692, 696, 700,
704, 708, 712, 716, 720, 724, 728, 732, 736, 740, 744,
748, 752, 756, 760, 764, 768, 772, 776, 780, 784, 788,
792, 796, 800, 804, 808, 812, 816, 820, 824, 828, 832,
836, 840, 844, 848, 852, 856, 860, 864, 868, 872, 876,
880, 884, 888, 892, 896, 900, 904, 908, 912, 916, 920,
924, 928, 932, 936, 940, 944, 948, 952, 956, 960, 964,
968, 972, 976, 980, 984, 988, 992, 996, 1000, 1004, 1008,
1012, 1016, 1020]) #这里是生成横坐标(对应高度上面的坐标),之后乘以了feature_stride=4就是为了扩展到原图(1024*1024)上面对应的坐标!!!!!这个至关重要。
>>> shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
>>> shifts_x
array([ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40,
44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84,
88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128,
132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172,
176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216,
220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260,
264, 268, 272, 276, 280, 284, 288, 292, 296, 300, 304,
308, 312, 316, 320, 324, 328, 332, 336, 340, 344, 348,
352, 356, 360, 364, 368, 372, 376, 380, 384, 388, 392,
396, 400, 404, 408, 412, 416, 420, 424, 428, 432, 436,
440, 444, 448, 452, 456, 460, 464, 468, 472, 476, 480,
484, 488, 492, 496, 500, 504, 508, 512, 516, 520, 524,
528, 532, 536, 540, 544, 548, 552, 556, 560, 564, 568,
572, 576, 580, 584, 588, 592, 596, 600, 604, 608, 612,
616, 620, 624, 628, 632, 636, 640, 644, 648, 652, 656,
660, 664, 668, 672, 676, 680, 684, 688, 692, 696, 700,
704, 708, 712, 716, 720, 724, 728, 732, 736, 740, 744,
748, 752, 756, 760, 764, 768, 772, 776, 780, 784, 788,
792, 796, 800, 804, 808, 812, 816, 820, 824, 828, 832,
836, 840, 844, 848, 852, 856, 860, 864, 868, 872, 876,
880, 884, 888, 892, 896, 900, 904, 908, 912, 916, 920,
924, 928, 932, 936, 940, 944, 948, 952, 956, 960, 964,
968, 972, 976, 980, 984, 988, 992, 996, 1000, 1004, 1008,
1012, 1016, 1020])
>>> shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
#这样就能组合成,通过 原图(j,i)索引就能找到(j,i)位置的anchor的(x,y)
>>> shifts_x
array([[ 0, 4, 8, ..., 1012, 1016, 1020],
[ 0, 4, 8, ..., 1012, 1016, 1020],
[ 0, 4, 8, ..., 1012, 1016, 1020],
...,
[ 0, 4, 8, ..., 1012, 1016, 1020],
[ 0, 4, 8, ..., 1012, 1016, 1020],
[ 0, 4, 8, ..., 1012, 1016, 1020]])>>> shifts_x.shape
(256, 256)
>>> shifts_y
array([[ 0, 0, 0, ..., 0, 0, 0],
[ 4, 4, 4, ..., 4, 4, 4],
[ 8, 8, 8, ..., 8, 8, 8],
...,
[1012, 1012, 1012, ..., 1012, 1012, 1012],
[1016, 1016, 1016, ..., 1016, 1016, 1016],
[1020, 1020, 1020, ..., 1020, 1020, 1020]])
>>> box_widths, box_centers_x = np.meshgrid(widths, shifts_x) #(x,y)位置,anchor的大小
>>> box_widths
array([[ 22.627417, 32. , 45.254834],
[ 22.627417, 32. , 45.254834],
[ 22.627417, 32. , 45.254834],
...,
[ 22.627417, 32. , 45.254834],
[ 22.627417, 32. , 45.254834],
[ 22.627417, 32. , 45.254834]])
>>> box_centers_x
array([[ 0, 0, 0],
[ 4, 4, 4],
[ 8, 8, 8],
...,
[1012, 1012, 1012],
[1016, 1016, 1016],
[1020, 1020, 1020]])>>> box_widths.shape
(65536, 3) # 256*256 = 65535 每个像素点3个anchors
>>> box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
>>> box_heights
array([[ 45.254834, 32. , 22.627417],
[ 45.254834, 32. , 22.627417],
[ 45.254834, 32. , 22.627417],
...,
[ 45.254834, 32. , 22.627417],
[ 45.254834, 32. , 22.627417],
[ 45.254834, 32. , 22.627417]])
>>> box_centers_y
array([[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
...,
[1020, 1020, 1020],
[1020, 1020, 1020],
[1020, 1020, 1020]])
>>> box_centers = np.stack(
... [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])>>> box_centers #得到anchors在原图上的坐标
array([[ 0, 0],
[ 0, 0],
[ 0, 0],
...,
[1020, 1020],
[1020, 1020],
[1020, 1020]])
>>> box_centers.shape
(196608, 2)
>>> box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
>>> box_sizes
array([[ 45.254834, 22.627417],
[ 32. , 32. ],
[ 22.627417, 45.254834],
...,
[ 45.254834, 22.627417],
[ 32. , 32. ],
[ 22.627417, 45.254834]])>>> box_sizes.shape
(196608, 2)
这就是featuremap是(256*256)大小生成的对应于原图的anchors! 最后196608是256*256*3,其他featuremap大小与此类似.