本文详解的是在maskrcnn_benchmark代码中,RoI层中的边框预测模块的损失函数计算代码。在本文详解的loss.py覆盖了预测边框筛选函数,通过该函数可以排除出原预测边框中不符合标准的边框,重新选择背景边框和目标边框,并使用这个边框构成的新预测边框来计算loss值。其代码详解为:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch.nn import functional as F
from maskrcnn_benchmark.layers import smooth_l1_loss
from maskrcnn_benchmark.modeling.box_coder import BoxCoder
from maskrcnn_benchmark.modeling.matcher import Matcher
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import (
BalancedPositiveNegativeSampler
)
from maskrcnn_benchmark.modeling.utils import cat
class FastRCNNLossComputation(object):
"""
Computes the loss for Faster R-CNN.
Also supports FPN
"""
def __init__(
self,
proposal_matcher,
fg_bg_sampler,
box_coder,
cls_agnostic_bbox_reg=False
):
"""
Arguments:
proposal_matcher (Matcher)
fg_bg_sampler (BalancedPositiveNegativeSampler)
box_coder (BoxCoder)
"""
self.proposal_matcher = proposal_matcher
self.fg_bg_sampler = fg_bg_sampler
self.box_coder = box_coder
self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
# todo 计算出所有预测边框所对应的基准边框(groun truth box),并返回对应的列表
def match_targets_to_proposals(self, proposal, target):
# 计算基准边框与预测边框相互之间的IoU
match_quality_matrix = boxlist_iou(target, proposal)
# 计算各个预测边框对应的基准边框(ground truth box)的索引列表,背景边框为-2,模糊边框为-1
matched_idxs = self.proposal_matcher(match_quality_matrix)
# Fast RCNN only need "labels" field for selecting the targets
# 获得基准边框(groun truth box)附加的属性labels标签,即边框的具体类别
target = target.copy_with_fields("labels")
# get the targets corresponding GT for each proposal
# NB: need to clamp the indices because we can have a single
# GT in the image, and matched_idxs can be -2, which goes
# out of bounds
# 计算各个预测边框对应的基准边框(ground truth box)列表,所有背景边框以及模糊边框都对应成第一个gt
matched_targets = target[matched_idxs.clamp(min=0)]
# 将对应的列表索引附加到对应基准边框列表中
matched_targets.add_field("matched_idxs", matched_idxs)
return matched_targets
# todo 计算出所有预测边框所对应的基准边框(groun truth box)
def prepare_targets(self, proposals, targets):
# 初始化标签列表
labels = []
# 初始化对应的基准边框(groun truth box)列表
regression_targets = []
# 针对每一张图片计算预测边框对应的基准边框列表
for proposals_per_image, targets_per_image in zip(proposals, targets):
# 得到各个预测边框对应的基准边框(ground truth box)列表,所有背景边框以及模糊边框都对应成第一个gt
matched_targets = self.match_targets_to_proposals(
proposals_per_image, targets_per_image
)
# 获得对应的列表索引
matched_idxs = matched_targets.get_field("matched_idxs")
# 获得每一张图片生成的预测边框对应的具体类别标签,并将其转换为相应的数据类型
labels_per_image = matched_targets.get_field("labels")
labels_per_image = labels_per_image.to(dtype=torch.int64)
# Label background (below the low threshold)
# 获得背景边框列表的索引
bg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
# 把预测边框对应的边框类别设置为0
labels_per_image[bg_inds] = 0
# Label ignore proposals (between low and high thresholds)
# 获得模糊边框列表的索引
ignore_inds = matched_idxs == Matcher.BETWEEN_THRESHOLDS
# 把模糊边框对应的边框类别设置为-1
labels_per_image[ignore_inds] = -1 # -1 is ignored by sampler
# compute regression targets
# 计算边框偏差值
regression_targets_per_image = self.box_coder.encode(
matched_targets.bbox, proposals_per_image.bbox
)
# 将预测边框列表对应边框类别添加到标签列表中
labels.append(labels_per_image)
# 添加边框回归列表
regression_targets.append(regression_targets_per_image)
return labels, regression_targets
def subsample(self, proposals, targets):
"""
This method performs the positive/negative sampling, and return
the sampled proposals.
Note: this function keeps a state.
这个方法完成了正负类别筛选,即从一系列预测边框中选择出一定个数的含有一定比例的
背景边框和含有目标的边框。值得注意的是这个方法始终保持着状态
参数:
proposals (list[BoxList]):预测边框列表
targets (list[BoxList]):基准边框列表(ground truth box)
"""
# 得到预测边框的类别标签以及边框回归信息列表
labels, regression_targets = self.prepare_targets(proposals, targets)
# 按照一定方式选取背景边框和目标边框,并返回其标签,在label中1为目标,0为背景
sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
# 将预测边框转变成list形式
proposals = list(proposals)
# add corresponding label and regression_targets information to the bounding boxes
# 按照图片,将额外属性保存到边框列表中
for labels_per_image, regression_targets_per_image, proposals_per_image in zip(
labels, regression_targets, proposals
):
proposals_per_image.add_field("labels", labels_per_image)
proposals_per_image.add_field(
"regression_targets", regression_targets_per_image
)
# distributed sampled proposals, that were obtained on all feature maps
# concatenated via the fg_bg_sampler, into individual feature map levels
# 从每一张图片中提取背景边框和目标边框
for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
zip(sampled_pos_inds, sampled_neg_inds)
):
img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
proposals_per_image = proposals[img_idx][img_sampled_inds]
proposals[img_idx] = proposals_per_image
self._proposals = proposals
return proposals
def __call__(self, class_logits, box_regression):
"""
Computes the loss for Faster R-CNN.
This requires that the subsample method has been called beforehand.
计算Faster R-CNN的损失值,这一操作需要subsample方法已经在之前被调用过
参数:
class_logits (list[Tensor]):类别信息数据
box_regression (list[Tensor]):边框信息回归数据
返回值:
classification_loss (Tensor):分类误差损失值
box_loss (Tensor):边框回归偏差损失值
"""
# 将类别信息矩阵连接起来,并去掉无用的维度
class_logits = cat(class_logits, dim=0)
# 将边框回归信息矩阵连接起来,并去掉无用的维度
box_regression = cat(box_regression, dim=0)
# 获得设备名称
device = class_logits.device
# 如果没有_proposals属性则报错
if not hasattr(self, "_proposals"):
raise RuntimeError("subsample needs to be called before")
# 获得预测边框的列表
proposals = self._proposals
# 将不同图片的预测边框的标签连接起来
labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
# 将不同图片的预测边框的回归值列表合并起来
regression_targets = cat(
[proposal.get_field("regression_targets") for proposal in proposals], dim=0
)
# 计算所有预测边框类别信息的损失值
classification_loss = F.cross_entropy(class_logits, labels)
# get indices that correspond to the regression targets for
# the corresponding ground truth labels, to be used with
# advanced indexing
# 获得预测边框标签中>0的索引,即获得有目标的预测边框在边框列表中的索引
sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
# 获得所有含有目标的预测边框的标签列表
labels_pos = labels[sampled_pos_inds_subset]
# 如果整个模型采用agnostic模型,即只分别含目标与不含目标两类
if self.cls_agnostic_bbox_reg:
map_inds = torch.tensor([4, 5, 6, 7], device=device)
else:
# 当时正常模式时,获得含有目标的边框在对应的边框回归信息矩阵中的索引
map_inds = 4 * labels_pos[:, None] + torch.tensor(
[0, 1, 2, 3], device=device)
# 计算边框回归信息的损失值
box_loss = smooth_l1_loss(
box_regression[sampled_pos_inds_subset[:, None], map_inds],
regression_targets[sampled_pos_inds_subset],
size_average=False,
beta=1,
)
box_loss = box_loss / labels.numel()
return classification_loss, box_loss
# todo 指定RoI_Box层边框损失的方法
def make_roi_box_loss_evaluator(cfg):
# 获得预测边框与基准边框(ground truth tox)的匹配方法
matcher = Matcher(
cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
allow_low_quality_matches=False,
)
# 获得在边框回归过程中边框回归各个部分之间的权重
bbox_reg_weights = cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
# 获得边框的编码器,其可以完成边框回归偏差值的计算以及通过网络生成的偏差值来计算新的预测边框
box_coder = BoxCoder(weights=bbox_reg_weights)
# 获得平衡背景边框与目标边框的方法
fg_bg_sampler = BalancedPositiveNegativeSampler(
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
)
# 获得是否采用将边框分为背景和目标两类的策略
cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
# 获得计算最后的预测边框的loss计算类
loss_evaluator = FastRCNNLossComputation(
matcher,
fg_bg_sampler,
box_coder,
cls_agnostic_bbox_reg
)
return loss_evaluator