SSD-box_utils逐行解释

# -*- coding: utf-8 -*-
import torch


def point_form(boxes):
    """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
    representation for comparison to point form ground truth data.
    Args:
        boxes: (tensor) center-size default boxes from priorbox layers.
    Return:
        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
    """
    return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
                     boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax

def center_size(boxes):
    """ Convert prior_boxes to (cx, cy, w, h)
    representation for comparison to center-size form ground truth data.
    Args:
        boxes: (tensor) point_form boxes
    Return:
        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
    """
    return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
                     boxes[:, 2:] - boxes[:, :2], 1)  # w, h


def intersect(box_a, box_b):  #boxa 为GT框集  boxb 为默认anchor集
    """ We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [A,4].
      box_b: (tensor) bounding boxes, Shape: [B,4].
    Return:
      (tensor) intersection area, Shape: [A,B].
    """
    A = box_a.size(0) #A abox的数量
    B = box_b.size(0)
    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), #调整格式 使 A B box可以逐项比较 dim0中有不同的abox dim1中将该box值扩充了B份
                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) #dim0中扩充了A份 Bbox集合  dim1中 每个里面有全部的Bbox 一共B个
    #以上步骤中找到了右上角点中的最小值  Abox 与 Bbox中 对比后的最小值
    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
    # 以上步骤中找到了左下角点中的最大值  Abox 与 Bbox中 对比后的最大值
    inter = torch.clamp((max_xy - min_xy), min=0) #右上-左下 求出了交集的长和宽
    return inter[:, :, 0] * inter[:, :, 1]  #返回面积 shape [A,B]

def jaccard(box_a, box_b):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    area_a = ((box_a[:, 2]-box_a[:, 0]) *
              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B] 求出Abox的面积并且扩展至交集返回值同形
    area_b = ((box_b[:, 2]-box_b[:, 0]) *
              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter  #求出并集面积
    return inter / union  # [A,B] 交/并

def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
    """Match each prior box with the ground truth box of the highest jaccard
    overlap, encode the bounding boxes, then return the matched indices
    corresponding to both confidence and location preds.
    Args:
        threshold: (float) The overlap threshold used when mathing boxes.
        truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
        priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
        variances: (tensor) Variances corresponding to each prior coord,
            Shape: [num_priors, 4].
        labels: (tensor) All the class labels for the image, Shape: [num_obj].
        loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
        conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
        idx: (int) current batch index
    Return:
        The matched indices corresponding to 1)location and 2)confidence preds.
    """
    # jaccard index
    overlaps = jaccard(
        truths,
        point_form(priors)
    )
    # (Bipartite Matching)
    # [1,num_objects] best prior for each ground truth  dim1中都是相同的Abox信息,目的是为GT匹配最佳的 priors 并记录索引
    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)  #index[A,1]
    # [1,num_priors] best ground truth for each prior
    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)  # dim0 为 priors匹配GT  并记录索引[1,B]
    best_truth_idx.squeeze_(0)  # 压缩最匹配的GT id [B]
    best_truth_overlap.squeeze_(0)  #压缩最匹配的GT匹配值  [B]
    best_prior_idx.squeeze_(1) #[A,1] →[A]
    best_prior_overlap.squeeze_(1)
    best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior  将匹配最好的GT重叠值设置为2
    #是否会存在1配多的情况?导致索引序列中有重复?
    # TODO refactor: index  best_prior_idx with long tensor
    # ensure every gt matches with its prior of max overlap
    for j in range(best_prior_idx.size(0)):  #range(A)
        best_truth_idx[best_prior_idx[j]] = j   #这一步应该是相对比较难理解的了,best_prior_idx[0]找到第一个与GT1匹配最好的
                                                #priorsid 并将对应的 这个prior原来匹配到最好的GT更换成 GT1 也就是优先GT的匹配0
    matches = truths[best_truth_idx]          # Shape: [num_priors,4] 提取出匹配上的truths数据
    conf = labels[best_truth_idx] + 1         # Shape: [num_priors]  提取类别标签  [A,classes]
    conf[best_truth_overlap < threshold] = 0  # label as background  将匹配重叠值小于一定阈值的认定为背景 conf 与 best_truth_overlap 同形,返回同形bool阵
    loc = encode(matches, priors, variances)
    loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
    conf_t[idx] = conf  # [num_priors] top class label for each prior


def encode(matched, priors, variances):
    """Encode the variances from the priorbox layers into the ground truth boxes
    we have matched (based on jaccard overlap) with the prior boxes.
    Args:
        matched: (tensor) Coords of ground truth for each prior in point-form
            Shape: [num_priors, 4].
        priors: (tensor) Prior boxes in center-offset form
            Shape: [num_priors,4].
        variances: (list[float]) Variances of priorboxes
    Return:
        encoded boxes (tensor), Shape: [num_priors, 4]
    """
    #编码将绝对坐标转换为相对anchor的相对坐标
    # dist b/t match center and prior's center
    g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]   #求出中心偏移量
    # encode variance
    g_cxcy /= (variances[0] * priors[:, 2:])     #转换比例
    # match wh / prior wh
    g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]   #求出宽高信息并转换比例
    #anchor 与 ground的中心对齐了吗
    g_wh = torch.log(g_wh) / variances[1]
    # return target for smooth_l1_loss
    return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]


# Adapted from https://github.com/Hakuyume/chainer-ssd
def decode(loc, priors, variances):
    """Decode locations from predictions using priors to undo
    the encoding we did for offset regression at train time.
    Args:
        loc (tensor): location predictions for loc layers,
            Shape: [num_priors,4]
        priors (tensor): Prior boxes in center-offset form.
            Shape: [num_priors,4].
        variances: (list[float]) Variances of priorboxes
    Return:
        decoded bounding box predictions
    """

    boxes = torch.cat((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes


def log_sum_exp(x):
    """Utility function for computing log_sum_exp while determining
    This will be used to determine unaveraged confidence loss across
    all examples in a batch.
    Args:
        x (Variable(tensor)): conf_preds from conf layers
    """
    x_max = x.data.max()
    return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max


# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
"""
def nms(boxes, scores, overlap=0.5, top_k=200):
    # Apply non-maximum suppression at test time to avoid detecting too many
    # overlapping bounding boxes for a given object.
    # Args:
    #     boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
    #     scores: (tensor) The class predscores for the img, Shape:[num_priors].
    #     overlap: (float) The overlap thresh for suppressing unnecessary boxes.
    #     top_k: (int) The Maximum number of box preds to consider.
    # Return:
    #     The indices of the kept boxes with respect to num_priors.
    

    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0:
        return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    # I = I[v >= 0.01]
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    # keep = torch.Tensor()
    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        # keep.append(i)
        keep[count] = i
        count += 1
        if idx.size(0) == 1:
            break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count
"""
def nms(boxes, scores, overlap=0.5, top_k=200):##参数:边界框精确位置,边界框类别的分数、nms阈值、前200个边界框
    '''(1)构建keep张量:初始值为0,形状与预测框的数量相同(预测框的数量为该类,类别置信度大于阈值的预测边界框的数量)'''
    keep = scores.new(scores.size(0)).zero_().long()

    if boxes.numel() == 0:
        return keep

    '''(2)计算预测边界框的面积'''
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)

    '''(3)获取 类别置信度分数最高的top_k个 预测边界框的索引'''
    v, idx = scores.sort(0)  #对类别置信度分数升序排序,返回 按照类别置信度分数排序后的   预测边界框的索引
    # I = I[v >= 0.01]
    '''类别置信度分数最高的前top_k个预测框的索引:idx '''
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()
    '''(4)将nms后的预测边界框的索引,存入keep'''
    count = 0
    while idx.numel() > 0:
        ''''#1.类别置信度分数最高的预测边界框————————索引逐一写入keep'''
        i = idx[-1]  # index of current largest val
        # keep.append(i)
        keep[count] = i
        count += 1

        if idx.size(0) == 1:
            break
        '''#2.剩余预测边界框的索引'''
        idx = idx[:-1]  # remove kept element from view
        '''#3.计算剩余预测边界框与,分数最高的边界框之间的iou值'''
        #####################################添加代码##########################################
        #否者出错RuntimeError: index_select(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.
        idx= torch.autograd.Variable(idx, requires_grad=False)
        idx = idx.data
        x1 = torch.autograd.Variable(x1, requires_grad=False)
        x1 = x1.data
        y1 = torch.autograd.Variable(y1, requires_grad=False)
        y1 = y1.data
        x2 = torch.autograd.Variable(x2, requires_grad=False)
        x2 = x2.data
        y2 = torch.autograd.Variable(y2, requires_grad=False)
        y2 = y2.data
        ######################################添加代码#################################################
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        #####################################添加代码##########################################
        #否者出错RuntimeError: index_select(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.
        area = torch.autograd.Variable(area, requires_grad=False)
        area = area.data
        idx= torch.autograd.Variable(idx, requires_grad=False)
        idx = idx.data
        ######################################添加代码#################################################
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        '''4.保留iou值小于nms阈值的预测边界框的索引'''
        idx = idx[IoU.le(overlap)]#保留交并比小于阈值的预测边界框的id
    return keep, count

你可能感兴趣的:(ssd,计算机视觉)