一、anchor部分参数的配置
上面是faster_rcnn中关于rpn anchor的参数配置,strides = [4, 8, 16, 32, 64] 是5个feature map 由输入尺寸, 下采样的倍数, ratios是同一个位置,长宽比的三种变换, 目前scales这个参数不是很明白。
二、生成基础anchors
在这个部分,anchors的大小已经决定了,这里anchors的大小是指在输入原图上的
stride | scales | anchor大小 基于输入原图的 |
|
---|---|---|---|
0 | 4 | 8 | 4 x 8 = 32 |
1 | 8 | 8 | 8 x 8 = 64 |
2 | 16 | 8 | 16 x 8 = 128 |
3 | 32 | 8 | 32 x 8 = 256 |
4 | 64 | 8 | 64 x 8 = 512 |
def gen_single_level_base_anchors(self,
base_size,
scales,
ratios,
center=None):
"""Generate base anchors of a single level.
Args:
base_size (int | float): Basic size of an anchor.
scales (torch.Tensor): Scales of the anchor.
ratios (torch.Tensor): The ratio between between the height
and width of anchors in a single level.
center (tuple[float], optional): The center of the base anchor
related to a single feature grid. Defaults to None.
Returns:
torch.Tensor: Anchors in a single-level feature maps.
"""
w = base_size #当前层,下采样的倍数,例如,4
h = base_size
if center is None:
x_center = self.center_offset * w # x_center:0.0
y_center = self.center_offset * h # y_center:0.0
else:
x_center, y_center = center
h_ratios = torch.sqrt(ratios) #h_ratios: tensor([0.7071, 1.0000, 1.4142]
w_ratios = 1 / h_ratios #w_ratios: tensor([1.4142, 1.0000, 0.7071]
if self.scale_major:
ws = (w * w_ratios[:, None] * scales[None, :]).view(-1) #ws: [45.2548, 32.0000, 22.6274]
hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)#hs: [22.6274, 32.0000, 45.2548]
else:
ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)
hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)
# use float anchor and the anchor's center is aligned with the
# pixel center
# 以 (0,0)为中心点时,这些框的坐标值
base_anchors = [
x_center - 0.5 * ws, y_center - 0.5 * hs, x_center + 0.5 * ws,
y_center + 0.5 * hs
]
base_anchors = torch.stack(base_anchors, dim=-1)
return base_anchors
三、将每一层的基础anchors映射到对应的层的坐标
def single_level_grid_priors(self,
featmap_size,
level_idx,
dtype=torch.float32,
device='cuda'):
"""Generate grid anchors of a single level.
Note:
This function is usually called by method ``self.grid_priors``.
Args:
featmap_size (tuple[int]): Size of the feature maps.
level_idx (int): The index of corresponding feature map level.
dtype (obj:`torch.dtype`): Date type of points.Defaults to
``torch.float32``.
device (str, optional): The device the tensor will be put on.
Defaults to 'cuda'.
Returns:
torch.Tensor: Anchors in the overall feature maps.
"""
base_anchors = self.base_anchors[level_idx].to(device).to(dtype)
feat_h, feat_w = featmap_size # 例如 80x152
stride_w, stride_h = self.strides[level_idx]
# First create Range with the default dtype, than convert to
# target `dtype` for onnx exporting.
#这里其实是将80x152这样的网格上的每一点,都映射到输入图片上的
#因为stide=4, 因此, 80x152上的 坐标(1x1), 映射到输入原图上是(4x4)
#stride = 4时, shift_x = [0, 4, 8, 12, 16, ......]
shift_x = torch.arange(0, feat_w, device=device).to(dtype) * stride_w
shift_y = torch.arange(0, feat_h, device=device).to(dtype) * stride_h
#生成原图上的网格坐标点
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)
#将这个层的anchors大小,例如strides = 4时, anchor大小为 32x32,
#映射到原图上的坐标
all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.view(-1, 4)
# first A rows correspond to A anchors of (0, 0) in feature map,
# then (0, 1), (0, 2), ...
return all_anchors