class BaseBBoxCoder(metaclass=ABCMeta):
"""Base bounding box coder"""
def __init__(self, **kwargs):
def encode(self, bboxes, gt_bboxes):
"""Encode deltas between bboxes and ground truth boxes"""
def decode(self, bboxes, bboxes_pred):
Decode the predicted bboxes according to prediction and base boxes
其中 [x,y,w,h] 表示gtbbox的中心宽和高;[xa,ya,wa,ha] 表示anchor的中心宽和高。简单来说,tx* ,ty* 表示二者做差除以宽高做了归一化;tw*,th*就是取了个对数。
import torch
from mmdet.core.bbox import build_bbox_coder
if __name__ == '__main__':
bbox_coder = dict(
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0])
coder = build_bbox_coder(bbox_coder)
# 构造两个预测向量和真值
proposals = torch.tensor([[1,1,3,3],[4,4,6,6]])
gt = torch.tensor([[2,2,3,3],[2,2,5,5]])
target_t = coder.encode(proposals,gt) # 调用解码方法
这部分代码我已经做了注释,总体思路就是首先将proposals和gtbbox由[xmin, ymin, xmax, ymax]变成[cx, cy, w,h],之后计算t*,然后将t*减均值除以标准差。
# 候选框数量和gt数量必须一致
assert proposals.size() == gt.size() # [N,4]
proposals = proposals.float()
gt = gt.float()
# proposals: [xmin, ymin, xmax, ymax] --> [cx, cy, w, h]
px = (proposals[..., 0] + proposals[..., 2]) * 0.5 # [N]
py = (proposals[..., 1] + proposals[..., 3]) * 0.5
pw = proposals[..., 2] - proposals[..., 0]
ph = proposals[..., 3] - proposals[..., 1]
# gt: [xmin, ymin, xmax, ymax] --> [cx, cy, w, h]
gx = (gt[..., 0] + gt[..., 2]) * 0.5
gy = (gt[..., 1] + gt[..., 3]) * 0.5
gw = gt[..., 2] - gt[..., 0]
gh = gt[..., 3] - gt[..., 1]
# 计算t*
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = torch.log(gw / pw)
dh = torch.log(gh / ph)
deltas = torch.stack([dx, dy, dw, dh], dim=-1) # [N] --> [N,4]
# 减均值除以标准差
means = deltas.new_tensor(means).unsqueeze(0) # [1,4]
stds = deltas.new_tensor(stds).unsqueeze(0) # [1,4]
deltas = deltas.sub_(means).div_(stds) # [N,4]
# 均值和标准差: [4] --> [1,4]
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means # [N,4]
# 得到dx,dy,dw,dh
dx = denorm_deltas[:, 0::4] # [N,1]
dy = denorm_deltas[:, 1::4]
dw = denorm_deltas[:, 2::4]
dh = denorm_deltas[:, 3::4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = dw.clamp(min=-max_ratio, max=max_ratio) # 裁减下
dh = dh.clamp(min=-max_ratio, max=max_ratio)
# 将rois/proposal转成[cx,cy,w,h]格式:[N,] --> [N,1] --> [N,1]
px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
# Compute width/height of each roi
pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
# 解码过程
gw = pw * dw.exp()
gh = ph * dh.exp()
gx = px + pw * dx
gy = py + ph * dy
# 将[cx,cy,w,h] --> [xmin, ymin, xmax, ymax]格式
x1 = gx - gw * 0.5
y1 = gy - gh * 0.5
x2 = gx + gw * 0.5
y2 = gy + gh * 0.5
# 裁减一下bbox,过大则裁减掉
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
# 返回修正过大预测框
bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)