box_selector_test
box_selector_test→make_fcos_postprocessor(),调用自fcos_core.modeling.rpn.fcos.inference
def make_fcos_postprocessor(config):
pre_nms_thresh = config.MODEL.FCOS.INFERENCE_TH
pre_nms_top_n = config.MODEL.FCOS.PRE_NMS_TOP_N
nms_thresh = config.MODEL.FCOS.NMS_TH
fpn_post_nms_top_n = config.TEST.DETECTIONS_PER_IMG
bbox_aug_enabled = config.TEST.BBOX_AUG.ENABLED
在fcos_core.config.defaults中找到
_C.MODEL.FCOS.INFERENCE_TH = 0.05
_C.MODEL.FCOS.PRE_NMS_TOP_N = 1000
_C.MODEL.FCOS.NMS_TH = 0.6
# Number of detections per image
_C.TEST.DETECTIONS_PER_IMG = 100
# Enable test-time augmentation for bounding box detection if True
_C.TEST.BBOX_AUG.ENABLED = False
box_selector = FCOSPostProcessor(
pre_nms_thresh=pre_nms_thresh,
pre_nms_top_n=pre_nms_top_n,
nms_thresh=nms_thresh,
fpn_post_nms_top_n=fpn_post_nms_top_n,
min_size=0,
num_classes=config.MODEL.FCOS.NUM_CLASSES,
bbox_aug_enabled=bbox_aug_enabled
)
return box_selector
在其上方索至类FCOSPostProcessor,其方法__init__()仅传参,直接看后面的方法。
class FCOSPostProcessor(torch.nn.Module):
"""
Performs post-processing on the outputs of the RetinaNet boxes.
This is only used in the testing.
"""
def __init__(
# omit
def forward_for_single_feature_map(
self, locations, box_cls,
box_regression, centerness,
image_sizes):
"""
Arguments:
anchors: list[BoxList]
box_cls: tensor of size N, A * C, H, W
box_regression: tensor of size N, A * 4, H, W
"""
N, C, H, W = box_cls.shape
# put in the same format as locations
# view、permute、reshape都是为了将conv2d输出的数据展开 H×W→1维
box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
box_cls = box_cls.reshape(N, -1, C).sigmoid()
box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
box_regression = box_regression.reshape(N, -1, 4)
centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
centerness = centerness.reshape(N, -1).sigmoid()
candidate_inds = box_cls > self.pre_nms_thresh # N×H*W×C 0/1
pre_nms_top_n = candidate_inds.view(N, -1).sum(1) # N 个数
# clamp():Clamp all elements in input into the range [min, max]
# 即将正采样个数限制在设定的pre_nms_top_n(#1000)以内
pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)
如论文3.1中Inference提到的,由神经网络得到分类分数(the classification scores),该值大于0.05则作为正采样并得到预测边界框。(参见FCOS论文及源码详解(一))
推断:N指图像频道(channel)数而非图像数目。后文将0-N算出的边界框一起存在results,若是不同的图像理应分开,因每个图上有多个边界框。
故candidate_inds值(0/1)代表各频道各像素点各类(N×H*W×C)是否为正采样,.view则(N×H*W×C)→(N×H*W*C),.sum(1)则将各像素点各类的1相加(N×H*W*C)→(N),故pre_nms_top_n代表神经网络算得各频道的正采样个数。
.clamp():Clamp all elements in input into the range [ min, max ],即将正采样个数限制在设定的pre_nms_top_n(#1000)以内
# multiply the classification scores with centerness scores
box_cls = box_cls * centerness[:, :, None]
如论文3.3中Center-ness for FCOS提到的,中心度(centerness)用于给最终分数加上权重,过滤远离目标中心位置生成的低质量边界框(参见FCOS论文及源码详解(一))
results = []
for i in range(N):
per_box_cls = box_cls[i] # H*W×C score
per_candidate_inds = candidate_inds[i] # H*W×C 0/1
per_box_cls = per_box_cls[per_candidate_inds] # n_positive_samples×C score
# nonzero(): returns a 2-D tensor where each row is the index for a nonzero value.
per_candidate_nonzeros = per_candidate_inds.nonzero()
per_box_loc = per_candidate_nonzeros[:, 0]
# 每列对应一类,定义背景为类0,故+1
per_class = per_candidate_nonzeros[:, 1] + 1
per_box_regression = box_regression[i] # H*W×4 distance
per_box_regression = per_box_regression[per_box_loc] # 同理,取出正采样的数据
per_locations = locations[per_box_loc]
per_pre_nms_top_n = pre_nms_top_n[i]
# 若正采样个数超过上限,按分类分数(已乘中心度)排序取前n个
if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
# topk(): Returns the k largest elements of the given input tensor along a given dimension.A namedtuple of (values, indices) is returned.
# sorted (bool, optional) – controls whether to return the elements in sorted order
per_box_cls, top_k_indices = \
per_box_cls.topk(per_pre_nms_top_n, sorted=False)
per_class = per_class[top_k_indices]
per_box_regression = per_box_regression[top_k_indices]
per_locations = per_locations[top_k_indices]
# 计算各正采样点边界框左上、右下两个角点坐标
# stack(): Concatenates a sequence of tensors along a new dimension.
# dim (int) – dimension to insert.
detections = torch.stack([
per_locations[:, 0] - per_box_regression[:, 0],
per_locations[:, 1] - per_box_regression[:, 1],
per_locations[:, 0] + per_box_regression[:, 2],
per_locations[:, 1] + per_box_regression[:, 3],
], dim=1)
h, w = image_sizes[i]
boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
boxlist.add_field("labels", per_class)
boxlist.add_field("scores", torch.sqrt(per_box_cls))
boxlist = boxlist.clip_to_image(remove_empty=False)
boxlist = remove_small_boxes(boxlist, self.min_size)
results.append(boxlist)
return results
BoxList调用自fcos_core.structures.bounding_box
class BoxList(object):
"""
# omit
"""
def __init__(self, bbox, image_size, mode="xyxy"):
device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu")
bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device)
if bbox.ndimension() != 2:
# omit, error reminder
self.bbox = bbox
self.size = image_size # (image_width, image_height)
self.mode = mode
self.extra_fields = {}
def add_field(self, field, field_data):
self.extra_fields[field] = field_data
def fields(self):
return list(self.extra_fields.keys())
def convert(self, mode):
# omit, verify mode
if mode == self.mode:
return self
xmin, ymin, xmax, ymax = self._split_into_xyxy()
# omit the code of _split_into_xyxy(), its function as its name
if mode == "xyxy":
bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
bbox = BoxList(bbox, self.size, mode=mode)
else:
TO_REMOVE = 1
# cat(): Concatenates the given sequence of seq tensors in the given dimension.
# dim (int, optional) – the dimension over which the tensors are concatenated
bbox = torch.cat(
(xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1
)
bbox = BoxList(bbox, self.size, mode=mode)
bbox._copy_extra_fields(self)
return bbox
由此可知,类BoxList的实例化变量self.bbox即边界框左上右下坐标/左上坐标+高宽
def clip_to_image(self, remove_empty=True):
TO_REMOVE = 1
# clamp_(): In-place version of clamp()
self.bbox[:, 0].clamp_(min=0, max=self.size[0] - TO_REMOVE)
self.bbox[:, 1].clamp_(min=0, max=self.size[1] - TO_REMOVE)
self.bbox[:, 2].clamp_(min=0, max=self.size[0] - TO_REMOVE)
self.bbox[:, 3].clamp_(min=0, max=self.size[1] - TO_REMOVE)
if remove_empty:
box = self.bbox
keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0])
return self[keep]
return self
推断:若box[:, 3] = box[:, 1]或box[:, 2] = box[:, 0],-TO_REMOVE将使keep为False,从而删去空边界框。
remove_small_boxes调用自fcos_core.structures.boxlist_ops
def remove_small_boxes(boxlist, min_size):
"""
# omit
"""
# TODO maybe add an API for querying the ws / hs
xywh_boxes = boxlist.convert("xywh").bbox
# unbind(): Removes a tensor dimension.
# dim (int) – dimension to remove
_, _, ws, hs = xywh_boxes.unbind(dim=1)
keep = (
(ws >= min_size) & (hs >= min_size)
).nonzero().squeeze(1)
return boxlist[keep]
故forward_for_single_feature_map()后几行代码将正采样边界框的回归结果存入类BoxList,包括边界框左上、右下坐标,图的大小,边界框对应的类别,最终分数(分类分数×中心度,开根),经过裁剪过滤后将所有边界框作为结果返回。
接下来看类FCOSPostProcessor的方法forward()
def forward(self, locations, box_cls, box_regression, centerness, image_sizes):
"""
# omit
"""
sampled_boxes = []
for _, (l, o, b, c) in enumerate(zip(locations, box_cls, box_regression, centerness)):
sampled_boxes.append(
self.forward_for_single_feature_map(
l, o, b, c, image_sizes
)
)
boxlists = list(zip(*sampled_boxes))
boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]
if not self.bbox_aug_enabled:
boxlists = self.select_over_all_levels(boxlists)
return boxlists
cat_boxlist()调用自fcos_core.structures.boxlist_ops,其功能如其名,代码略。只注意此两句:
size = bboxes[0].size
assert all(bbox.size == size for bbox in bboxes)
其中size并非是BoxList.size,因foward()中boxlists = list(zip(*sampled_boxes)),关于zip的原理下面举个例子:
a = [1,5,7];b = [2,3,4];c = [a,b]
d = list(zip(*c))
print(d)
>>>[(1, 2), (5, 3), (7, 4)]
而forward_for_single_feature_map()正是返回一个BoxList组成的列表,sampled_boxes为列表的列表,故bboxes[0]实际是一个tuple,包含各层特征图的一个BoxList,故bbox.size实际是特征图的层数。