对于目标检测算法而言,正负样本的分配,采样策略以及正负样本的数量和比例的设置等,对算法的精度有着显著的影响,了解其机制和原理便于我们加深对算法的了解和后续对算法的优化改进.
在检测算法中正负样本的设置主要包含两个关键的步骤:
1.assigner:为每一个先验框[anchor]分配属性
- 正样本
- 负样本
- 其它[即不当成正样本又不当成负样本处理,忽略]
2.sampler:采取某种策略[如随机采样]从分配好的正负样本中选出对应数量的正负样本进行训练
以下相关参数的设置和代码的讲解,来源于mmdetection
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7, #正样本阈值
neg_iou_thr=0.3, #负样本阈值
min_pos_iou=0.2, #最低正样本阈值
ignore_iof_thr=0.3),
sampler=dict(
type='RandomSampler',
num=256, #总数量
pos_fraction=0.5, #比例
neg_pos_ub=-1,
add_gt_as_proposals=False), #是否将gt添加到正样本中
allowed_border=128, #允许anchor尺寸超过图像边缘的最大像素
pos_weight=-1, #正样本的权重,如果小于0则为1,大于0则为对应设置的值
debug=False),
2.1.处理掉一些中心点坐标超过边界过多、宽高尺寸过大的anchor
inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
img_meta['img_shape'][:2],
cfg.allowed_border) #128
anchors = flat_anchors[inside_flags.type(torch.bool), :]
def anchor_inside_flags(flat_anchors,
valid_flags,
img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0:
inside_flags = valid_flags & \
(flat_anchors[:, 0] >= -allowed_border).type(torch.uint8) & \
(flat_anchors[:, 1] >= -allowed_border).type(torch.uint8) & \
(flat_anchors[:, 2] < img_w + allowed_border).type(torch.uint8) & \
(flat_anchors[:, 3] < img_h + allowed_border).type(torch.uint8)
else:
inside_flags = valid_flags
return inside_flags
2.2.assign :为每一个Anchor分配属性:
0–负样本
-1—忽略,既不是正样本也不是负样本
positive number— assigned gt[与哪一个gt_bbox对应,存放的是对应gt_bbox的索引]
计算步骤:
1)计算gt_bbox【g,4】与anchor_bbox[k,4]的iou,得到一个【g,k】的矩阵。
overlaps = bbox_overlaps(gt_bboxes, bboxes)
2)初始化属性矩阵,将所有anchor置为-1
assigned_gt_inds =overlaps.new_full((num_bboxes, ),-1,dtype=torch.long)
3)计算每一个Anchor与gt的最大iou和索引
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
4)计算每一个gt与anchor的最大iou和索引
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
5)遍历所有anchor,若iou
assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < self.neg_iou_thr)] = 0
pos_inds = max_overlaps >= self.pos_iou_thr
assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 #[**此处对索引进行了加1操作]**
6)遍历所有的gt,若最大iou大于min_pos_iou[0.2],将其置为正样本(一个GT可能有多个或者一个anchor与之匹配)
if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all: #true
max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] #(与该GT的iou等于最大iou的bbox都分配到该gt)
assigned_gt_inds[max_iou_inds] = i + 1
else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
7)将输出结果写入AssignResult类中。
class AssignResult(object):
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
self.num_gts = num_gts 正样本数量
self.gt_inds = gt_inds anchor属性(-1;no care 0 : 负样本 1-based:与该框匹配的gt)
self.max_overlaps = max_overlaps每一个候选框与gt的最大iou
self.labels = labels anchor的类别
2.3.samper: 在分好属性的anchor中按照一定的策略挑选出对应数量的正负样本框
计算步骤
1)计算期望的正样本数量:总数量*比例
num_expected_pos = int(self.num * self.pos_fraction)
2)sample_pos: 生成正样本,如果正样本数量 < 预期数量,则输出所有正样本索引【一般达不到预期】;如果正样本 > 期望数,则在正样本中随机选取对应数量的正样本[此处采用的随机抽样的方法选取正样本]
pos_inds = self.pos_sampler._sample_pos(assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0) #取出正样本的索引
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
3)计算负样本数量。总样本数-正样本数【此处可以通过比例控制负样本的数量】
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0: # -1
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
4)sample_neg:生成负样本,规则同正样本
neg_inds = self.neg_sampler._sample_neg(assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
def _sample_neg(self, assign_result, num_expected, **kwargs):
"""Randomly sample some negative samples."""
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.random_choice(neg_inds, num_expected)
5)将最后的结果写入到SamplingResult类中
class SamplingResult(object):
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
gt_flags):
self.pos_inds = pos_inds 正样本对应的anchor索引
self.neg_inds = neg_inds 负样本对应的anchor索引
self.pos_bboxes = bboxes[pos_inds] 正样本对应的坐标值
self.neg_bboxes = bboxes[neg_inds] 负样本对应的坐标值
self.pos_is_gt = gt_flags[pos_inds]
self.num_gts = gt_bboxes.shape[0] 正样本数量
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
if assign_result.labels is not None:
self.pos_gt_labels = assign_result.labels[pos_inds]
else:
self.pos_gt_labels = None
@property
def bboxes(self):
return torch.cat([self.pos_bboxes, self.neg_bboxes]) #前面都是正样本后面都是负样本
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=0.3),
sampler=dict(
type='OHEMSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
2.1.assign :为每一个Anchor分配属性,其方法和步骤同RPN的操作【参数设置不同】
2.2.samper: 在分好属性的anchor中按照一定的策略挑选出对应数量的正负样本框
不同于rpn的操作,此处采用的是OHEMSampler,引入在线困难样例挖掘,即:求得所有的正负样本与gt的loss值,选取loss值最大的前num_expected个正负样本
def hard_mining(self, inds, num_expected, bboxes, labels, feats):
with torch.no_grad():
rois = bbox2roi([bboxes])
bbox_feats = self.bbox_roi_extractor(
feats[:self.bbox_roi_extractor.num_inputs], rois)
cls_score, _ = self.bbox_head(bbox_feats)
loss = self.bbox_head.loss(
cls_score=cls_score,
bbox_pred=None,
labels=labels,
label_weights=cls_score.new_ones(cls_score.size(0)),
bbox_targets=None,
bbox_weights=None,
reduction_override='none')['loss_cls']
_, topk_loss_inds = loss.topk(num_expected)
return inds[topk_loss_inds]
计算步骤
1)计算期望的正样本数量:总数量*比例
num_expected_pos = int(self.num * self.pos_fraction)
2)sample_pos: 生成正样本,如果正样本数量 < 预期数量,则输出所有正样本索引【一般达不到预期】;如果正样本 > 期望数,则通过困难样例挖掘的方法选出对应数量的正样本.
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
def _sample_pos(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard positive samples
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected: #如果正样本数量少于期望值,直接返回相应的正样本
return pos_inds
else:
return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
assign_result.labels[pos_inds], feats)
#如果正样本数量大于期望值,引入困难样例挖掘
3)计算负样本数量。总样本数-正样本数【此处可以通过比例控制负样本的数量】
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
4)sample_neg:生成负样本,规则同正样本
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
def _sample_neg(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard negative samples
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
assign_result.labels[neg_inds], feats)
5)将最后的结果写入到SamplingResult类中
return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
class SamplingResult(object):
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
gt_flags):
self.pos_inds = pos_inds 正样本对应的anchor索引
self.neg_inds = neg_inds 负样本对应的anchor索引
self.pos_bboxes = bboxes[pos_inds] 正样本对应的坐标值
self.neg_bboxes = bboxes[neg_inds] 负样本对应的坐标值
self.pos_is_gt = gt_flags[pos_inds]
self.num_gts = gt_bboxes.shape[0] 正样本数量
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
if assign_result.labels is not None:
self.pos_gt_labels = assign_result.labels[pos_inds]
else:
self.pos_gt_labels = None
@property
def bboxes(self):
return torch.cat([self.pos_bboxes, self.neg_bboxes]) #前面都是正样本后面都是负样本
注意:
1.对于每一个img,我们将正负样本的结果和与gt的匹配情况存放在一个SamplingResult中,对于一个batch我们将结果存放在SamplingResults中
sampling_results = []
for i in range(num_imgs):
assign_result = bbox_assigner.assign(proposal_list[i],
gt_bboxes[i],
gt_bboxes_ignore[i],
gt_labels[i])
sampling_result = bbox_sampler.sample(
assign_result,
proposal_list[i],
gt_bboxes[i],
gt_labels[i],
feats=[lvl_feat[i][None] for lvl_feat in feat_roi])
sampling_results.append(sampling_result)
2.对于每一个sampling_result里面存放的bbox顺序都是先存正样本然后放负样本,后面索取的时候利用了这一特征
@property
def bboxes(self):
return torch.cat([self.pos_bboxes, self.neg_bboxes]) #前面都是正样本后面都是负样本
mlvl_featmap_sizes = [featmap.size()[-2:] for featmap in mlvl_preds] 各特征图的尺寸
# multi level anchors
mlvl_anchors_num = torch.Tensor([anchors.size(0) for anchors in mlvl_anchors]).long().to(device)
mlvl_anchors_cusum = torch.cumsum(mlvl_anchors_num, dim=0).to(device)#[3, 6, 9]
mlvl_anchors_cusum_ = torch.cat([torch.Tensor([0]).long().to(device), mlvl_anchors_cusum])#[0, 3, 6, 9]
# multi level grids
mlvl_grids_num = torch.Tensor([grids.size(0) for grids in mlvl_grids]).long().to(device) #各层anchor的总数量【不等于3】
num_gts = gt_bboxes.size(0) 正样本数量
# concat all level anchors to a single tensor
flat_anchors = torch.cat(mlvl_anchors) 拼接所有的anchor到一个维度 【9,2】
gt_cxy = (gt_bboxes[:, :2] + gt_bboxes[:, 2:4]) / 2
gt_wh = gt_bboxes[:, 2:4] - gt_bboxes[:, :2]
gt_xywh = torch.cat([gt_cxy, gt_wh], dim=1)
#将gt和anchor的中心点都归一到圆点,通过长宽求得顶点坐标
pesudo_gt_bboxes = torch.cat([-0.5 * gt_wh, 0.5*gt_wh], dim=1)
pesudo_anchors = torch.cat([-0.5 * flat_anchors, 0.5 * flat_anchors], dim=1)
overlaps = bbox_overlaps(pesudo_gt_bboxes, pesudo_anchors)求gt与anchor的iou [gt, 9]
2.计算得到每一个gt的anchor索引【9个中的一个】和特征图索引【分配到哪一层】
_, gt_argmax_overlaps = overlaps.max(dim=1) #计算每一个gt与那个anchor匹配【9个中的一个】
argmax_level = torch.stack([torch.nonzero(mlvl_anchors_cusum > argmax, as_tuple=False)[0][0] for argmax in gt_argmax_overlaps])#计算每一个gt与那一层的anchor匹配
gt_inds = torch.arange(0, num_gts, dtype=torch.long).to(device) #gt的索引
3.逐特征图进行遍历:
for level_idx in range(self.num_levels): #遍历每一层特征图
stride = mlvl_strides[level_idx] #每一层的步长
anchors = mlvl_anchors[level_idx] #每一层的anchor尺寸【3个】
grids = mlvl_grids[level_idx] #该层所有网格点的索引 【w*h, 2】
feat_h, feat_w = mlvl_featmap_sizes[level_idx] #特征图尺寸
grid_anchors = torch.cat((grids[:, None, :] * stride - anchors[None, :, :] / 2 + stride / 2,
grids[:, None, :] * stride + anchors[None, :, :] / 2 + stride / 2), dim=-1).view(-1, 4) #该层所有网格点上的所有anchor的坐标【w*h*3,4】每个网格点3个anchor
grids_num_level = mlvl_grids_num[level_idx] #该层网格点数量 w*h
anchors_num_level = mlvl_anchors_num[level_idx] #该层的anchor数量【3】
# initialize assigned gt inds by assume all sample is negtive
assigned_gt_inds_level = overlaps.new_full((grids_num_level, anchors_num_level),
0,
dtype=torch.long) #初始化每个网格点的每一个anchor属性 【-1:忽略,0:负样本,>0:与对应gt匹配的正样本】
# 初始化操作
bbox_targets_level = overlaps.new_full((grids_num_level, anchors_num_level, 4), 0)
reg_weights_level = overlaps.new_full((grids_num_level, anchors_num_level, 2), 0)
assigned_labels_level = overlaps.new_full((grids_num_level, anchors_num_level), -1, dtype=torch.long)
vedges_flag_level = overlaps.new_full((grids_num_level, anchors_num_level, 4), 0)
vedges_level = overlaps.new_full((grids_num_level, anchors_num_level, 8), 0)
vedges_flag_weights_level = overlaps.new_full((grids_num_level, anchors_num_level, 4), 0)
vedges_weights_level = overlaps.new_full((grids_num_level, anchors_num_level, 8), 0)
valid_cube_inds = overlaps.new_full((grids_num_level, anchors_num_level), 0, dtype=torch.bool)
# whether to ignore the sample which is overlaped with groud truth bboxes
if self.ignore_iou_thr > 0:
ovelaps_level = bbox_overlaps(gt_bboxes, grid_anchors)#对应到输入分辨率计算每一个anchor与gt的iou 【gt, w*h*3】
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps, _ = ovelaps_level.max(dim=0) #计算每一个anchor与gt的最大iou
assigned_gt_inds_level = assigned_gt_inds_level.view(-1) #维度变化
# assigne gt inds with -1 when max overlaps between sample and gt bboxes > igore_iou_thr
assigned_gt_inds_level[max_overlaps > self.ignore_iou_thr] = -1 #将iou大于阈值的anchor属性置为-1
assigned_gt_inds_level = assigned_gt_inds_level.view(grids_num_level, anchors_num_level) #恢复维度
# assinged gt inds 判断是否有gt分配到该层特征图上的anchor,并拿到对应gt的索引
matched_gt_inds = torch.nonzero(argmax_level == level_idx, as_tuple=False).squeeze(1)
if matched_gt_inds.numel() > 0: 如果有gt分配到该层,进行进一步处理
matched_anchor_inds = gt_argmax_overlaps[matched_gt_inds] - mlvl_anchors_cusum_[level_idx] #拿到匹配的anchor索引
matched_gt_xywhs = gt_xywh[matched_gt_inds]
matched_gt_locx = (matched_gt_xywhs[:, 0] / stride).clamp(min=0).long()
matched_gt_locy = (matched_gt_xywhs[:, 1] / stride).clamp(min=0).long()
matched_grid_index = matched_gt_locy * feat_w + matched_gt_locx #拿到匹配的网格点
#[matched_grid_index, matched_anchor_inds] 就是对应网格点的对应anchor
assigned_gt_inds_level[matched_grid_index, matched_anchor_inds] = gt_inds[matched_gt_inds] + 1
bbox_targets_level[matched_grid_index, matched_anchor_inds, 0] = (matched_gt_xywhs[:, 0] / stride - matched_gt_locx).clamp(self.eps, 1 - self.eps)
bbox_targets_level[matched_grid_index, matched_anchor_inds, 1] = (matched_gt_xywhs[:, 1] / stride - matched_gt_locy).clamp(self.eps, 1 - self.eps)
matched_gt_bbox_wh = matched_gt_xywhs[:, 2:4]
matched_anchor = mlvl_anchors[level_idx][matched_anchor_inds]
bbox_targets_level[matched_grid_index, matched_anchor_inds, 2:4] = torch.log((matched_gt_bbox_wh / matched_anchor).clamp(min=self.eps))
reg_weights_level[matched_grid_index, matched_anchor_inds, 0] = 2.0 - matched_gt_bbox_wh.prod(1) / pad_w / pad_h
reg_weights_level[matched_grid_index, matched_anchor_inds, 1] = 2.0 - matched_gt_bbox_wh.prod(1) / pad_w / pad_h
assigned_labels_level[matched_grid_index, matched_anchor_inds] = gt_labels[matched_gt_inds]
grid_anchors_level = torch.cat((grids[:, None, :] * stride - anchors[None, :, :] / 2 + stride / 2,
grids[:, None, :] * stride + anchors[None, :, :] / 2 + stride / 2), dim=-1)
pos_proposals = grid_anchors_level[matched_grid_index, matched_anchor_inds]
matched_gt_cubes = gt_cubes[matched_gt_inds]
vedges_flag_level[matched_grid_index, matched_anchor_inds] = matched_gt_cubes[:, :4]
vedges_level[matched_grid_index, matched_anchor_inds] = edge2delta(pos_proposals, matched_gt_cubes[:, 4:12],
means=self.target_means, stds=self.target_stds)
# 计算有效的cube
# NumPos * 4
vedges_flag_level_valid = (vedges_flag_level[matched_grid_index, matched_anchor_inds] > 0).cuda()
# # NumPos * 1, where valid cube is 1
valid_cube_inds[matched_grid_index, matched_anchor_inds] = torch.Tensor([item.any() for item in vedges_flag_level_valid]). \
cuda(device=device).bool()
vedges_flag_weights_level[valid_cube_inds, :] = 1
detail_vedges_flag = vedges_flag_level.reshape((-1, 3, 4, 1)).repeat(1, 1, 1, 2).reshape(-1, 3, 8)
vedges_weights_level[detail_vedges_flag != 0] = 1
vedges_flag.append(vedges_flag_level)
vedges.append(vedges_level)
vedges_flag_weights.append(vedges_flag_weights_level)
vedges_weights.append(vedges_weights_level)
assigned_gt_inds.append(assigned_gt_inds_level)
bbox_targets.append(bbox_targets_level)
reg_weights.append(reg_weights_level)
assigned_labels.append(assigned_labels_level)