计算机视觉基础之IoU和mIoU

IoU

IoU(Intersection over Union),又称重叠度/交并比。
1、NMS:当在图像中预测多个proposals、pred bboxes时,由于预测的结果间可能存在高冗余(即同一个目标可能被预测多个矩形框),因此可以过滤掉一些彼此间高重合度的结果;具体操作就是根据各个bbox的score降序排序,剔除与高score bbox有较高重合度的低score bbox,其中重合度的度量指标就是IoU
2、mAP:得到检测算法的预测结果后,需要对pred bbox与gt bbox一起评估检测算法的性能,涉及到的评估指标为mAP,那么当一个pres bbox与gt bbox的重合度较高(如IoU score >0.5),且分类结果也正确,就可以认为是该pred bbox预测正确,其中也涉及IoU的 概念。
计算机视觉基础之IoU和mIoU_第1张图片
绿框:gt bbox
红框: pred bbox
计算机视觉基础之IoU和mIoU_第2张图片
IoU = gt bbox、pred bbox交集的面积 / 二者并集的面积

# -*- coding: utf-8 -*-
#
# This is the python code for calculating bbox IoU,
# By running the script, we can get the IoU score between pred / gt bboxes
#
# copyright @ netease, AI group

from __future__ import print_function, absolute_import
import numpy as np

def get_IoU(pred_bbox, gt_bbox):
    """
    return iou score between pred / gt bboxes
    :param pred_bbox: predict bbox coordinate
    :param gt_bbox: ground truth bbox coordinate
    :return: iou score
    """

    # bbox should be valid, actually we should add more judgements, just ignore here...
    # assert ((abs(pred_bbox[2] - pred_bbox[0]) > 0) and
    #         (abs(pred_bbox[3] - pred_bbox[1]) > 0))
    # assert ((abs(gt_bbox[2] - gt_bbox[0]) > 0) and
    #         (abs(gt_bbox[3] - gt_bbox[1]) > 0))

    # -----0---- get coordinates of inters
    ixmin = max(pred_bbox[0], gt_bbox[0])
    iymin = max(pred_bbox[1], gt_bbox[1])
    ixmax = min(pred_bbox[2], gt_bbox[2])
    iymax = min(pred_bbox[3], gt_bbox[3])
    iw = np.maximum(ixmax - ixmin + 1., 0.)
    ih = np.maximum(iymax - iymin + 1., 0.)

    # -----1----- intersection
    inters = iw * ih

    # -----2----- union, uni = S1 + S2 - inters
    uni = ((pred_bbox[2] - pred_bbox[0] + 1.) * (pred_bbox[3] - pred_bbox[1] + 1.) +
           (gt_bbox[2] - gt_bbox[0] + 1.) * (gt_bbox[3] - gt_bbox[1] + 1.) -
           inters)

    # -----3----- iou
    overlaps = inters / uni

    return overlaps


def get_max_IoU(pred_bboxes, gt_bbox):
    """
    given 1 gt bbox, >1 pred bboxes, return max iou score for the given gt bbox and pred_bboxes
    :param pred_bbox: predict bboxes coordinates, we need to find the max iou score with gt bbox for these pred bboxes
    :param gt_bbox: ground truth bbox coordinate
    :return: max iou score
    """

    # bbox should be valid, actually we should add more judgements, just ignore here...
    # assert ((abs(gt_bbox[2] - gt_bbox[0]) > 0) and
    #         (abs(gt_bbox[3] - gt_bbox[1]) > 0))

    if pred_bboxes.shape[0] > 0:
        # -----0---- get coordinates of inters, but with multiple predict bboxes
        ixmin = np.maximum(pred_bboxes[:, 0], gt_bbox[0])
        iymin = np.maximum(pred_bboxes[:, 1], gt_bbox[1])
        ixmax = np.minimum(pred_bboxes[:, 2], gt_bbox[2])
        iymax = np.minimum(pred_bboxes[:, 3], gt_bbox[3])
        iw = np.maximum(ixmax - ixmin + 1., 0.)
        ih = np.maximum(iymax - iymin + 1., 0.)

        # -----1----- intersection
        inters = iw * ih

        # -----2----- union, uni = S1 + S2 - inters
        uni = ((gt_bbox[2] - gt_bbox[0] + 1.) * (gt_bbox[3] - gt_bbox[1] + 1.) +
               (pred_bboxes[:, 2] - pred_bboxes[:, 0] + 1.) * (pred_bboxes[:, 3] - pred_bboxes[:, 1] + 1.) -
               inters)

        # -----3----- iou, get max score and max iou index
        overlaps = inters / uni
        ovmax = np.max(overlaps)
        jmax = np.argmax(overlaps)

    return overlaps, ovmax, jmax

if __name__ == "__main__":

    # test1
    pred_bbox = np.array([50, 50, 90, 100])   # top-left: <50, 50>, bottom-down: <90, 100>, 
    gt_bbox = np.array([70, 80, 120, 150])
    print (get_IoU(pred_bbox, gt_bbox))
    
    # test2
    pred_bboxes = np.array([[15, 18, 47, 60],
                          [50, 50, 90, 100],
                          [70, 80, 120, 145],
                          [130, 160, 250, 280],
                          [25.6, 66.1, 113.3, 147.8]])
    gt_bbox = np.array([70, 80, 120, 150])
    print (get_max_IoU(pred_bboxes, gt_bbox))

转载至https://zhuanlan.zhihu.com/p/47189358

mIoU

Mean Intersection over Union(MIoU, 均交并比),为语义分割的标准度量。其计算两个几个的交集和并集之比,在语义分割问题中,这两个集合为真实值(ground truth)和预测值(predicted segmentation)。这个比例可以变形为TP(交集)比上TP、FP、FN之和(交集)。在每个类上计算IoU,然后取平均。
tf源码解析
第一步:计算混淆矩阵

# 主要代码
def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32,
                     name=None, weights=None): 
    # 例子:labels =     [0, 1, 2, 0, 3]
    #      predictions =[0, 1, 1, 3, 3]
    if num_classes is None: # 不指定类别个数,就以labels或者predictions最大的指定,即4
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1 
    else:
      num_classes_int64 = math_ops.cast(num_classes, dtypes.int64)
      labels = control_flow_ops.with_dependencies(
          [check_ops.assert_less(
              labels, num_classes_int64, message='`labels` out of bound')],
          labels)
      predictions = control_flow_ops.with_dependencies(
          [check_ops.assert_less(
              predictions, num_classes_int64,
              message='`predictions` out of bound')],
          predictions)

    if weights is not None:
      predictions.get_shape().assert_is_compatible_with(weights.get_shape())
      weights = math_ops.cast(weights, dtype)

    shape = array_ops.stack([num_classes, num_classes])
    indices = array_ops.stack([labels, predictions], axis=1) 
    # indices = [[0,0],[1,1],[2,1],[0,3],[3,3]]
    values = (array_ops.ones_like(predictions, dtype)
              if weights is None else weights)
    # 对应位置的values,若不指定,则全为1
    cm_sparse = sparse_tensor.SparseTensor(
        indices=indices, values=values, dense_shape=math_ops.to_int64(shape))
    # 稀疏张量,指定indices位置为指定value,其他位置为0
    # 多次指定一个位置,value为多次相加的结果
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)

SparseTensor例子:

import tensorflow as tf
a = tf.SparseTensor(indices=[[0,0], [1,2], [0, 0]], values=[1, 1, 1], dense_shape=[3, 4])
zero_m = array_ops.zeros(math_ops.to_int32([3,4]),dtype=tf.int32) 
r = sparse_ops.sparse_add(zero_m, a)
sess = tf.Session(config=tf.ConfigProto(device_count={'cpu':0}))
sess.run(r) 
# array([[2, 0, 0, 0],
#       [0, 0, 1, 0],
#       [0, 0, 0, 0]], dtype=int32)

第二步:计算mIoU

def compute_mean_iou(total_cm, name):
  """Compute the mean intersection-over-union via the confusion matrix."""
  sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0))
  sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1))
  cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) # 交集
  denominator = sum_over_row + sum_over_col - cm_diag # 分母,即并集

  # The mean is only computed over classes that appear in the
  # label or prediction tensor. If the denominator is 0, we need to
  # ignore the class.
  num_valid_entries = math_ops.reduce_sum(
      math_ops.cast(
          math_ops.not_equal(denominator, 0), dtype=dtypes.float32)) # 类别个数

  # If the value of the denominator is 0, set it to 1 to avoid
  # zero division.
  denominator = array_ops.where(
      math_ops.greater(denominator, 0), denominator,
      array_ops.ones_like(denominator))
  iou = math_ops.div(cm_diag, denominator) # 各类IoU

  # If the number of valid entries is 0 (no classes) we return 0.
  result = array_ops.where(
      math_ops.greater(num_valid_entries, 0),
      math_ops.reduce_sum(iou, name=name) / num_valid_entries, 0) #mIoU
  return result

通过tf.metrics.mean_iou的API可以得到mIoU,但并没有把各类IoU释放出来,为了计算各类IoU,可以修改上面的代码,获取IoU中间结果,也可以用weight的方式变相计算。

基本思路就是把只保留一类的IoU,其他类IoU置零,然后最后将mIoU * num_classes就可以了。

tp_position = tf.equal(tf.to_int32(labels), tf.to_int32(predictions))
label_0_weight = tf.where((tp_position & tf.not_equal(labels, 0)), tf.zeros_like(labels),
                                  tf.ones_like(labels))
## 混淆矩阵对角线上只保留一类非0,其他类都置0
metric_map['IOU/class_0_iou'] = tf.metrics.mean_iou(
            predictions, labels, dataset.num_classes, weights=label_0_weight)
## 结果是0类IoU/num_classes

pytorch源码解析:

class IOUMetric:
    """
    Class to calculate mean-iou using fast_hist method
    """

    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.hist = np.zeros((num_classes, num_classes))

    def _fast_hist(self, label_pred, label_true):
        mask = (label_true >= 0) & (label_true < self.num_classes)
        hist = np.bincount(
            self.num_classes * label_true[mask].astype(int) +
            label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
        return hist

    def add_batch(self, predictions, gts):
        for lp, lt in zip(predictions, gts):
            self.hist += self._fast_hist(lp.flatten(), lt.flatten())

    def evaluate(self):
        acc = np.diag(self.hist).sum() / self.hist.sum()
        acc_cls = np.diag(self.hist) / self.hist.sum(axis=1)
        acc_cls = np.nanmean(acc_cls)
        iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
        mean_iu = np.nanmean(iu)
        freq = self.hist.sum(axis=1) / self.hist.sum()
        fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
        return acc, acc_cls, iu, mean_iu, fwavacc

pytorch简化版:

#RT:RightTop
#LB:LeftBottom
def IOU(rectangle A, rectangleB):
    W = min(A.RT.x, B.RT.x) - max(A.LB.x, B.LB.x)
    H = min(A.RT.y, B.RT.y) - max(A.LB.y, B.LB.y)
    if W <= 0 or H <= 0:
        return 0;
    SA = (A.RT.x - A.LB.x) * (A.RT.y - A.LB.y)
    SB = (B.RT.x - B.LB.x) * (B.RT.y - B.LB.y)
    cross = W * H
    return cross/(SA + SB - cross)

你可能感兴趣的:(cv,知识,python,深度学习,算法)