maskrcnn_benchmark 代码详解之 modeling/rpn/loss.py

前言:

  在maskrcnn_benchmark的RPN层,选取预测边框(proposal)的过程和计算边框目标得分(objectiveness)以及边框回归(box regression)的损失值的过程不是同步的,过程也有偏差。选取边框(proposal)是从多个特征层分别选取若干个目标得分高的边框,然后再从这些边框里选出若干预测边框(proposal)或者单个特征层(非FPN)提取目标得分(objectiveness)高的若干边框。而对于计算损失函数(loss)的过程则不同,首先要得到与所有的锚点(anchor)与基准边框(ground truth box)两者之间互相的IoU,相对应的基准边框(ground truth box), 然后再计算每个锚点所对应的基准边框。

  得到每个锚点和其对应的基准边框后,就可以从所有的锚点中选择合适的锚点计算损失函数(loss)了。首先是给所有的锚点打标签,将不采纳的标签赋值为-1,内容为背景的锚点标签为0,内容含有目标的锚点赋值为1。这一就可以从标签为0和1的锚点里随机的筛选出符合一定个数和比例的背景锚点与含目标锚点,以这些锚点为基础就可以计算损失函数(loss)。

  loss分为两部分,第一部分为锚点评分的损失函数,他的比较对象是锚点的标签,为1的情况表示该锚点含有目标的概率为1,因此用网络模型得到的锚点目标得分(objectiveness)与锚点的标签(label)对比得到损失值;第二部分为边框回归损失,通过锚点的边框回归层(box regression)与实际计算出来的锚点(anchor)与基准框(ground truth box)的偏差值对比得到损失值。

  其代码详解如下:

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""
This file contains specific functions for computing losses on the RPN
file
"""

import torch
from torch.nn import functional as F

from .utils import concat_box_prediction_layers

from ..balanced_positive_negative_sampler import BalancedPositiveNegativeSampler
from ..utils import cat

from maskrcnn_benchmark.layers import smooth_l1_loss
from maskrcnn_benchmark.modeling.matcher import Matcher
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist


class RPNLossComputation(object):
    """
    This class computes the RPN loss.
    """

    def __init__(self, proposal_matcher, fg_bg_sampler, box_coder,
                 generate_labels_func):
        """
        Arguments:
            proposal_matcher (Matcher)
            fg_bg_sampler (BalancedPositiveNegativeSampler)
            box_coder (BoxCoder)
        """
        # self.target_preparator = target_preparator
        # 指定边框匹配函数,用于找到与锚点对应的基准边框(gt)
        self.proposal_matcher = proposal_matcher
        # 指定目标锚点和背景锚点选择函数,用于选择一定比例的背景和目标边框
        self.fg_bg_sampler = fg_bg_sampler
        # 指定边框编码函数,用以实现边框回归和得到预测边框
        self.box_coder = box_coder
        # 初始化需要复制的属性
        self.copied_fields = []
        # 指定标签生成函数,用以生成锚点对应的基准边框的索引
        self.generate_labels_func = generate_labels_func
        # 指定需要放弃的锚点类型
        self.discard_cases = ['not_visibility', 'between_thresholds']

    # todo 找到所有锚点对应的基准边框(gt)
    def match_targets_to_anchors(self, anchor, target, copied_fields=[]):
        # 计算所有锚点与所有基准边框之间的IoU
        match_quality_matrix = boxlist_iou(target, anchor)
        # 计算所有锚点各自对应的基准边框的索引,包括背景的索引为-1,等
        matched_idxs = self.proposal_matcher(match_quality_matrix)
        # RPN doesn't need any fields from target
        # for creating the labels, so clear them all
        # 将需要复制的属性复制到基准边框列表中
        target = target.copy_with_fields(copied_fields)
        # get the targets corresponding GT for each anchor
        # NB: need to clamp the indices because we can have a single
        # GT in the image, and matched_idxs can be -2, which goes
        # out of bounds
        # 得到所有锚点各自对应的基准边框索引,将背景边框等无对应边框的锚点统统映射到第一个基准边框
        matched_targets = target[matched_idxs.clamp(min=0)]
        # 得到所有锚点各自对应的基准边框列表
        matched_targets.add_field("matched_idxs", matched_idxs)
        return matched_targets

    # todo 获得锚点(anchor)的标签:-1为要舍弃的,0为背景,其余的为对应的gt。获得所有锚点与和其对应的gt的偏差,即边框回归
    def prepare_targets(self, anchors, targets):
        # 初始化锚点的标签
        labels = []
        # 初始化锚点与gt基准边框之间的偏差
        regression_targets = []
        # 循环从每一张图片中读取锚点和gt,然后进行处理
        for anchors_per_image, targets_per_image in zip(anchors, targets):
            # 得到与各个锚点对应的gt
            matched_targets = self.match_targets_to_anchors(
                anchors_per_image, targets_per_image, self.copied_fields
            )
            # 得到与各个锚点对应的gt的索引
            matched_idxs = matched_targets.get_field("matched_idxs")
            # 得到与各个锚点对应的gt的标签列表,其中0为舍弃,1为有用边框
            labels_per_image = self.generate_labels_func(matched_targets)
            labels_per_image = labels_per_image.to(dtype=torch.float32)

            # Background (negative examples)得到与各个锚点内容为背景的索引,并将其标签设为0
            bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
            labels_per_image[bg_indices] = 0

            # discard anchors that go out of the boundaries of the image
            # 将需要放弃的锚点的索引置为-1
            # 处理超出图片的锚点
            if "not_visibility" in self.discard_cases:
                labels_per_image[~anchors_per_image.get_field("visibility")] = -1

            # discard indices that are between thresholds
            # 丢掉IoU介于背景和目标之间的锚点
            if "between_thresholds" in self.discard_cases:
                inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS
                labels_per_image[inds_to_discard] = -1

            # compute regression targets计算每张图片中,所有锚点与其对应基准边框之间的偏差
            regression_targets_per_image = self.box_coder.encode(
                matched_targets.bbox, anchors_per_image.bbox
            )
            # 将标签信息和边框回归信息保存到最开始初始化的列表里
            labels.append(labels_per_image)
            regression_targets.append(regression_targets_per_image)

        return labels, regression_targets


    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList]):生成的所有锚点
            objectness (list[Tensor]):由FPN得到的计算目标得分的特征图
            box_regression (list[Tensor]):由FPN得到的计算边框回归的特征图
            targets (list[BoxList]):每个图片上的基准边框(gt)

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        # 分别将每一个图片的不同FPN层中生成的锚点合并起来
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        # 分别得到每一个图片的所有锚点相对应的基准边框的列表
        labels, regression_targets = self.prepare_targets(anchors, targets)
        # 根据所有锚点的标签选取作为背景的锚点和作为目标的锚点的标签,该标签中0为未选择,1为选择
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        # 将选择的锚点标签转换为锚点的索引值,并去除掉索引列表中多余的维度
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
        # 将选中的正负锚点索引值合并到一起
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        # 将所有图片中的RPN Head中的边框目标得分层和边框回归层分别合并成统一张量N*ratio(或N*4ratio)
        # 的边框分类信息和边框回归信息
        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)
        # 去除边框得分信息张量中的多余维度
        objectness = objectness.squeeze()
        # 将所有图片中的所有锚点的标签合并在一起
        labels = torch.cat(labels, dim=0)
        # 将所有图片中的所有锚点的边框偏差合并在一起
        regression_targets = torch.cat(regression_targets, dim=0)
        # 计算锚点边框损失,只是用随机选择的有目标的锚点进行计算
        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())
        # 使用随机选择的背景锚点和有目标的锚点计算边框回归损失
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )
        # 返回得到的边框目标得分损失和边框回归损失
        return objectness_loss, box_loss


# This function should be overwritten in RetinaNet
# todo 生成锚点的标签,有目标的锚点标签为1,没有的为0
def generate_rpn_labels(matched_targets):
    # 获取锚点对应的基准边框的索引,
    matched_idxs = matched_targets.get_field("matched_idxs")
    # 将有目标的锚点置为1
    labels_per_image = matched_idxs >= 0
    return labels_per_image


def make_rpn_loss_evaluator(cfg, box_coder):
    # 指定边框匹配函数,用于找到与锚点对应的基准边框(gt),指定两个非极大线性抑制参数等
    matcher = Matcher(
        cfg.MODEL.RPN.FG_IOU_THRESHOLD,
        cfg.MODEL.RPN.BG_IOU_THRESHOLD,
        allow_low_quality_matches=True,
    )
    # 指定目标锚点和背景锚点选择函数,用于选择一定比例的背景和目标边框,指定两种锚点的个数和比例
    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, cfg.MODEL.RPN.POSITIVE_FRACTION
    )
    # 调用RPN的损失计算函数
    loss_evaluator = RPNLossComputation(
        matcher,
        fg_bg_sampler,
        box_coder,
        generate_rpn_labels
    )
    return loss_evaluator

 

你可能感兴趣的:(maskrcnn,benchmark,loss.py,objectiveness,box,regression,损失函数计算)