def generate_labels(all_anchors, target_bbox):
"""
There is no negative pairs in SiamRPN
Args:
anchors: all anchors scattered in detection frame
from `scatter_anchors` method
bbox: ground truth bbox utils in detection
frame, xyxy format
Returns:
cls: (K, 17, 17) cls branch label
delta: (4, K, 17, 17) reg branch label
delta_weight: (K, 17, 17) reg loss normalization factor
"""
K, ft_size = all_anchors[0].shape[1], all_anchors[0].shape[2]
tcx, tcy, tw, th = corner2center(target_bbox)
# -1 ignore 0 negative 1 positive
cls = -1 * np.ones((K, ft_size, ft_size), dtype=np.int64)
delta = np.zeros((4, K, ft_size, ft_size), dtype=np.float32)
delta_weight = np.zeros((K, ft_size, ft_size), dtype=np.float32)
anchor_corner, anchor_center = all_anchors[0], all_anchors[1]
x1, y1, x2, y2 = anchor_corner[0], anchor_corner[1], \
anchor_corner[2], anchor_corner[3]
cx, cy, w, h = anchor_center[0], anchor_center[1], \
anchor_center[2], anchor_center[3]
# delta: (4, K, 17, 17) broadcast
delta[0] = (tcx - cx) / w
delta[1] = (tcy - cy) / h
delta[2] = np.log(tw / w)
delta[3] = np.log(th / h)
# calculate IoU between target_bbox and all anchors
overlap = IoU([x1, y1, x2, y2], target_bbox)
pos = np.where(overlap > cfg.TRAIN.THR_HIGH)
neg = np.where(overlap < cfg.TRAIN.THR_LOW)
def select(position, keep_num=16):
num = position[0].shape[0]
if num <= keep_num:
return position, num
slt = np.arange(num)
np.random.shuffle(slt)
slt = slt[:keep_num]
return tuple(p[slt] for p in position), keep_num
# make sure at most 16 positive samples and totally 64 sam-
# ples from one training pair as paper said
pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)
# print("pos_num: ", pos_num)
cls[pos] = 1
cls[neg] = 0
delta_weight[pos] = 1. / (pos_num + 1e-6) # avoid ZeroDivisionError
# (K,17,17) (4,K,17,17) (K,17,17) (1,)
return cls, delta, delta_weight
当只有一张search时,
对于regression部分的label而言,形状为(4, K, 17, 17),在这段代码里就是delta,其实就是对(K * 17 * 17)这么多个锚框统一都用论文中的那个公式计算,这样理论上无论选中哪个框都能正确地平移到物体那里,
delta_weight形状是(K,17,17),作用是屏蔽负样本和模糊样本,只取正样本的regression进行求loss,实现上就是正样本的点有对应非0系数,代码里是正样本数目的倒数,负样本和模糊样本对应点系数为0
下面是regression的求loss过程
先求全部锚框的regression的loss
然后沿着regression维的方向求和,这样得到的featureMap形状为(N,K,17,17),内容就是每个点装着自身4个offset的loss的和
最后乘上mask,只计算正样本,也就是iou大的锚框的regression的loss
模糊样本的label为-1,正样本为1,负样本为0,但计算loss的时候只计算正样本和负样本,而且是分别求loss,权重各占50%,都用同种loss,