Caffe2 - (二十四) Detectron 之 utils 函数(2)

Caffe2 - (二十四) Detectron 之 utils 函数(2)

1. env.py


"""Environment helper functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import imp
import os
import sys


def get_runtime_dir():
    """
    寻找运行时的工作路径.
    """
    return sys.path[0]


def get_py_bin_ext():
    """
    寻找 python 的二进制扩展文件.
    """
    return '.py'


def set_up_matplotlib():
    """ 
    matplotlib 设置.
    """
    import matplotlib
    # 采用非交互的后端 non-interactive backend
    matplotlib.use('Agg')


def exit_on_error():
    """
    出现错误时,退出 detectron tool.
    """
    sys.exit(1)


def import_nccl_ops():
    """
    导入 NCCL ops.
    由于 NCCL 依赖已经在 Caffe2 gpu lib 中编译,不需要再加载 NCCL ops.
    """
    pass


def get_caffe2_dir():
    """
    寻找 Caffe2 所在路径.
    """
    _fp, c2_path, _desc = imp.find_module('caffe2')
    assert os.path.exists(c2_path), \
        'Caffe2 not found at \'{}\''.format(c2_path)
    c2_dir = os.path.dirname(os.path.abspath(c2_path))
    return c2_dir


def get_detectron_ops_lib():
    """
    寻找 Detectron ops library 库路径.
    """
    c2_dir = get_caffe2_dir()
    detectron_ops_lib = os.path.join(c2_dir, 'lib/libcaffe2_detectron_ops_gpu.so')
    assert os.path.exists(detectron_ops_lib), \
        ('Detectron ops lib not found at \'{}\'; make sure that your Caffe2 '
         'version includes Detectron module').format(detectron_ops_lib)
    return detectron_ops_lib


def get_custom_ops_lib():
    """
    寻找自定义的 osp library 库路径.
    """
    lib_dir, _utils = os.path.split(os.path.dirname(__file__))
    custom_ops_lib = os.path.join(lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
    assert os.path.exists(custom_ops_lib), \
        'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
    return custom_ops_lib

2. c2.py

"""Helpful utilities for working with Caffe2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from six import string_types
import contextlib

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import dyndep
from caffe2.python import scope

import utils.env as envu


def import_contrib_ops():
    """
    导入 Detectron 所需的 contrib ops.
    """
    envu.import_nccl_ops()


def import_detectron_ops():
    """
    导入 Detectron ops.
    """
    detectron_ops_lib = envu.get_detectron_ops_lib()
    dyndep.InitOpsLibrary(detectron_ops_lib)


def import_custom_ops():
    """
    导入自定义的 ops.
    """
    custom_ops_lib = envu.get_custom_ops_lib()
    dyndep.InitOpsLibrary(custom_ops_lib)


def SuffixNet(name, net, prefix_len, outputs):
    """
    从给定的 net 返回新 Net. 新 Net 只包括移除 first `prefix_len` 个 ops 后的 ops.
    新 Net 是 net 的一个后缀suffix.

    在 outputs 中的 Blobs 作为 external outpout blobs 被注册.
    """
    outputs = BlobReferenceList(outputs)
    for output in outputs:
        assert net.BlobIsDefined(output)
    new_net = net.Clone(name)

    del new_net.Proto().op[:]
    del new_net.Proto().external_input[:]
    del new_net.Proto().external_output[:]

    # 添加 suffix ops
    new_net.Proto().op.extend(net.Proto().op[prefix_len:])
    # 添加 external input blobs
    # 将任何未定义的 blobs 作为 external inputs
    input_names = [
        i for op in new_net.Proto().op for i in op.input
        if not new_net.BlobIsDefined(i)]
    new_net.Proto().external_input.extend(input_names)
    # 添加 external output blobs
    output_names = [str(o) for o in outputs]
    new_net.Proto().external_output.extend(output_names)
    return new_net, [new_net.GetBlobRef(o) for o in output_names]


def BlobReferenceList(blob_ref_or_list):
    """
    将参数以 BlobReferences 列表的形式返回.
    """
    if isinstance(blob_ref_or_list, core.BlobReference):
        return [blob_ref_or_list]
    elif type(blob_ref_or_list) in (list, tuple):
        for b in blob_ref_or_list:
            assert isinstance(b, core.BlobReference)
        return blob_ref_or_list
    else:
        raise TypeError('blob_ref_or_list must be a BlobReference or a list/tuple of BlobReferences')


def UnscopeName(possibly_scoped_name):
    """
    从一个(可能的)作用域名字中移除任何名字. Remove any name scoping from a (possibly) scoped name
    如将名字 'gpu_0/foo' 转化为 'foo'.
    """
    assert isinstance(possibly_scoped_name, string_types)
    return possibly_scoped_name[
        possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]


@contextlib.contextmanager
def NamedCudaScope(gpu_id):
    """
    创建 GPU name scope 和 CUDA device scope.
    用于 reduce `with ...` nesting levels.
    """
    with GpuNameScope(gpu_id):
        with CudaScope(gpu_id):
            yield


@contextlib.contextmanager
def GpuNameScope(gpu_id):
    """
    创建 GPU device `gpu_id` 的名字域 name scope.
    """
    with core.NameScope('gpu_{:d}'.format(gpu_id)):
        yield


@contextlib.contextmanager
def CudaScope(gpu_id):
    """
    创建 GPU device `gpu_id` 的 CUDA device scope.
    """
    gpu_dev = CudaDevice(gpu_id)
    with core.DeviceScope(gpu_dev):
        yield


@contextlib.contextmanager
def CpuScope():
    """
    创建 CPU device scope.
    """
    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
    with core.DeviceScope(cpu_dev):
        yield


def CudaDevice(gpu_id):
    """
    创建 Cuda device.
    选择 gpud_id 设备.
    """
    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)


def gauss_fill(std):
    """
    减少冗余.
    Gaussian fill helper
    """
    return ('GaussianFill', {'std': std})


def const_fill(value):
    """
    减少冗余.
    Constant fill helper.
    """
    return ('ConstantFill', {'value': value})

3. io.py

"""IO utilities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cPickle as pickle
import hashlib
import logging
import os
import re
import sys
import urllib2

logger = logging.getLogger(__name__)

# 下载 Detectron 模型的 base url.
_DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'


def save_object(obj, file_name):
    """
    序列化 Python 对象object,进行存储.
    """
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def cache_url(url_or_file, cache_dir):
    """
    根据 URL 下载文件到 cache_dir,并返回下载缓存文件的路径.
    如果参数不是 URL,则直接返回.
    """
    is_url = re.match(r'^(?:http)s?://', url_or_file, re.IGNORECASE) is not None

    if not is_url:
        return url_or_file

    url = url_or_file
    assert url.startswith(_DETECTRON_S3_BASE_URL), \
        ('Detectron only automatically caches URLs in the Detectron S3 '
         'bucket: {}').format(_DETECTRON_S3_BASE_URL)

    cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
    if os.path.exists(cache_file_path):
        assert_cache_file_is_ok(url, cache_file_path) # 检验 cache file 的hash 值.
        return cache_file_path

    cache_file_dir = os.path.dirname(cache_file_path)
    if not os.path.exists(cache_file_dir):
        os.makedirs(cache_file_dir)

    logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
    download_url(url, cache_file_path)
    assert_cache_file_is_ok(url, cache_file_path)
    return cache_file_path


def assert_cache_file_is_ok(url, file_path):
    """
    检验 cache file 的 hash 值是否正确.
    文件已经缓存,验证其 md3sum 是否匹配,并返回其 local path.
    """
    cache_file_md5sum = _get_file_md5sum(file_path)
    ref_md5sum = _get_reference_md5sum(url)
    assert cache_file_md5sum == ref_md5sum, \
        ('Target URL {} appears to be downloaded to the local cache file '
         '{}, but the md5 hash of the local file does not match the '
         'reference (actual: {} vs. expected: {}). You may wish to delete '
         'the cached file and try again to trigger automatic '
         'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)


def _progress_bar(count, total):
    """
    显示下载进度.
    参考:
    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
    """
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('  [{}] {}% of {:.1f}MB file  \r'.format(bar, percents, total / 1024 / 1024) )
    sys.stdout.flush()
    if count >= total:
        sys.stdout.write('\n')


def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
    """
    下载 URL,并写入 dst_file_path.
    参考:
    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    """
    response = urllib2.urlopen(url)
    total_size = response.info().getheader('Content-Length').strip()
    total_size = int(total_size)
    bytes_so_far = 0

    with open(dst_file_path, 'wb') as f:
        while 1:
            chunk = response.read(chunk_size)
            bytes_so_far += len(chunk)
            if not chunk:
                break
            if progress_hook:
                progress_hook(bytes_so_far, total_size)
            f.write(chunk)

    return bytes_so_far


def _get_file_md5sum(file_name):
    """
    计算文件的 md5 hash 值.
    """
    hash_obj = hashlib.md5()
    with open(file_name, 'r') as f:
        hash_obj.update(f.read())
    return hash_obj.hexdigest()


def _get_reference_md5sum(url):
    """
    根据惯例,url 的 md5 hash 值保存在 url + '.md5sum'.
    """
    url_md5sum = url + '.md5sum'
    md5sum = urllib2.urlopen(url_md5sum).read().strip()
    return md5sum

4. keypoints.py

"""Keypoint utilities (somewhat specific to COCO keypoints)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np

from core.config import cfg
import utils.blob as blob_utils


def get_keypoints():
    """
    获取 COCO kyepoints 以及其 left/right 翻转的对应映射.
    test 中 keypoints 不包括在 COCO json 中,这里进行提供.
    """
    keypoints = [ # 18 joints
        'nose',
        'left_eye',
        'right_eye',
        'left_ear',
        'right_ear',
        'left_shoulder',
        'right_shoulder',
        'left_elbow',
        'right_elbow',
        'left_wrist',
        'right_wrist',
        'left_hip',
        'right_hip',
        'left_knee',
        'right_knee',
        'left_ankle',
        'right_ankle'
    ]
    keypoint_flip_map = {
        'left_eye': 'right_eye',
        'left_ear': 'right_ear',
        'left_shoulder': 'right_shoulder',
        'left_elbow': 'right_elbow',
        'left_wrist': 'right_wrist',
        'left_hip': 'right_hip',
        'left_knee': 'right_knee',
        'left_ankle': 'right_ankle'
    }
    return keypoints, keypoint_flip_map


def get_person_class_index():
    """
    COCO 中 person class 的索引值 - 1.
    """
    return 1


def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):
    """
    left/right 翻转 keypoints 坐标.
    keypoints 和 keypoints_flip_map 由 get_keypoints() 得到.
    """
    flipped_kps = keypoint_coords.copy()
    for lkp, rkp in keypoint_flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]
        flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]

    # 翻转 x 坐标
    flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1
    # 保持 COCO 格式,即,if visibility == 0, then x, y = 0
    inds = np.where(flipped_kps[:, 2, :] == 0)
    flipped_kps[inds[0], 0, inds[1]] = 0
    return flipped_kps


def flip_heatmaps(heatmaps):
    """
    水平翻转 heatmaps.
    """
    keypoints, flip_map = get_keypoints()
    heatmaps_flipped = heatmaps.copy()
    for lkp, rkp in flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]
        heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]
    heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]
    return heatmaps_flipped


def heatmaps_to_keypoints(maps, rois):
    """
    从 heatmaps 得到预测的 keypoints 位置.
    输出格式为:(#rois, 4, #keypoints) 
        - 4 rows 对应于每个 keypoints 的 (x, y, logit, prob).

    该函数将 HEATMAP_SIZE x HEATMAP_SIZE image 中的离散坐标转换为连续的 keypoints 坐标.
    采用 Heckbert 1990: c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.) 的变换,来保持 keypoints_to_heatmap_labels 的一致性.
    """
    offset_x = rois[:, 0]
    offset_y = rois[:, 1]

    widths = rois[:, 2] - rois[:, 0]
    heights = rois[:, 3] - rois[:, 1]
    widths = np.maximum(widths, 1)
    heights = np.maximum(heights, 1)
    widths_ceil = np.ceil(widths)
    heights_ceil = np.ceil(heights)

    # NCHW to NHWC for use with OpenCV
    maps = np.transpose(maps, [0, 2, 3, 1])
    min_size = cfg.KRCNN.INFERENCE_MIN_SIZE
    xy_preds = np.zeros((len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)
    for i in range(len(rois)):
        if min_size > 0:
            roi_map_width = int(np.maximum(widths_ceil[i], min_size))
            roi_map_height = int(np.maximum(heights_ceil[i], min_size))
        else:
            roi_map_width = widths_ceil[i]
            roi_map_height = heights_ceil[i]
        width_correction = widths[i] / roi_map_width
        height_correction = heights[i] / roi_map_height
        roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height),
            interpolation=cv2.INTER_CUBIC)
        # Bring back to CHW
        roi_map = np.transpose(roi_map, [2, 0, 1])
        roi_map_probs = scores_to_probs(roi_map.copy())
        w = roi_map.shape[2]
        for k in range(cfg.KRCNN.NUM_KEYPOINTS):
            pos = roi_map[k, :, :].argmax()
            x_int = pos % w
            y_int = (pos - x_int) // w
            assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max())
            x = (x_int + 0.5) * width_correction
            y = (y_int + 0.5) * height_correction
            xy_preds[i, 0, k] = x + offset_x[i]
            xy_preds[i, 1, k] = y + offset_y[i]
            xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
            xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]

    return xy_preds


def keypoints_to_heatmap_labels(keypoints, rois):
    """
    对 target heatmap 中的 keypoints 位置进行编码,以用于 SoftmaxWithLoss.

    将 keypoints 从连续图片坐标的半开区间 [x1, x2),映射到离散图片坐标的闭区间 [0, HEATMAP_SIZE - 1].
    采用  Heckbert 1990 ("What is the coordinate of a pixel?") 的变换方案: d = floor(c) 和 c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.)
    """
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros(shape)

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # 由于使用了 floor 处理,如果 keypoints 正好位于 roi 的right 或 bottom 边界上,则将对其平移 eps,以保证其位于 groundtruth hetmap 上.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        valid_loc = np.logical_and(np.logical_and(x >= 0, y >= 0),
                                   np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)

        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
        heatmaps[:, kp] = lin_ind * valid
        weights[:, kp] = valid

    return heatmaps, weights


def scores_to_probs(scores):
    """
    将 CxHxW 的 scores 转换为空间概率."""
    channels = scores.shape[0]
    for c in range(channels):
        temp = scores[c, :, :]
        max_score = temp.max()
        temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
        scores[c, :, :] = temp
    return scores


def nms_oks(kp_predictions, rois, thresh):
    """
    基于 kp predictions 进行 NMS.
    """
    scores = np.mean(kp_predictions[:, 2, :], axis=1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = compute_oks(kp_predictions[i], rois[i], kp_predictions[order[1:]], rois[order[1:]])
        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):
    """
    计算预测的 keypoints 关于 gt_keypoints 的 OKS.
    src_keypoints: 4xK
    src_roi: 4x1
    dst_keypoints: Nx4xK
    dst_roi: Nx4
    """

    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
    vars = (sigmas * 2)**2

    # 面积area
    src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)

    # 如果 keypoints visible, 计算 per-keypoints 间的距离.
    dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]
    dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]

    e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2
    e = np.sum(np.exp(-e), axis=1) / e.shape[1]

    return e

5. segm.py

"""
处理 COCO 格式的 segmentation masks 的函数.
所使用到的项有:
    - mask: 2D numpy array 格式的二值 mask. a binary mask encoded as a 2D numpy array
    - segm: segmentation mask 格式,(COCO 有两种 segmentation mask 格式:polygon or RLE)
        - polygon: COCO 的多边形格式(polygon format)
        - RLE: COCO's run length encoding format
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

import pycocotools.mask as mask_util


def flip_segms(segms, height, width):
    """
    对 masks 列表中的各 mask 进行 left/right 翻转.
    """
    def _flip_poly(poly, width):
        flipped_poly = np.array(poly)
        flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
        return flipped_poly.tolist()

    def _flip_rle(rle, height, width):
        if 'counts' in rle and type(rle['counts']) == list:
            # Magic RLE 格式,利用 COCO API 的 showAnns 函数来处理.
            rle = mask_util.frPyObjects([rle], height, width)
        mask = mask_util.decode(rle)
        mask = mask[:, ::-1, :]
        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
        return rle

    flipped_segms = []
    for segm in segms:
        if type(segm) == list:
            # 多边形格式 Polygon format
            flipped_segms.append([_flip_poly(poly, width) for poly in segm])
        else:
            # RLE format
            assert type(segm) == dict
            flipped_segms.append(_flip_rle(segm, height, width))
    return flipped_segms


def polys_to_mask(polygons, height, width):
    """
    将 COCO 的多边形分割(polygon segmentation) 格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
    多边形分割(polygon segmentation) 被理解为在 height x width 图片中的封闭区域.
    得到的 mask shape 是 (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def mask_to_bbox(mask):
    """
    计算二值 mask 的边界框 bounding box.
    """
    xs = np.where(np.sum(mask, axis=0) > 0)[0]
    ys = np.where(np.sum(mask, axis=1) > 0)[0]

    if len(xs) == 0 or len(ys) == 0:
        return None

    x0 = xs[0]
    x1 = xs[-1]
    y0 = ys[0]
    y1 = ys[-1]
    return np.array((x0, y0, x1, y1), dtype=np.float32)


def polys_to_mask_wrt_box(polygons, box, M):
    """
    将 COCO 多边形分割(polygon segmentation)格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
    多边形分割(polygon segmentation) 被理解为在给定 box 的封闭区域,大小为 M x M 的 mask.
    得到的 mask shape 是 (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def polys_to_boxes(polys):
    """
    将多边形列表(polygons list) 转换为边界框 bounding boxes array.
    """
    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
    for i in range(len(polys)):
        poly = polys[i]
        x0 = min(min(p[::2]) for p in poly)
        x1 = max(max(p[::2]) for p in poly)
        y0 = min(min(p[1::2]) for p in poly)
        y1 = max(max(p[1::2]) for p in poly)
        boxes_from_polys[i, :] = [x0, y0, x1, y1]

    return boxes_from_polys


def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'):
    """
    组合 all_masks 中多个重叠 masks 来返回新的 mask(对应于 top_masks).
    支持两种 masks 组合方法:
        - AVG - 使用重叠 mask 像素的加权平均值(weighted average of overlapping mask pixels)
        - UNION - 所有 mask 像素的并集(union of all mask pixels)s.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # 空 mask 的极端情况 Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out


def rle_mask_nms(masks, dets, thresh, mode='IOU'):
    """
    基于 masks 间的重叠度量(overlap measurement) 进行贪婪 NMS 处理.
    度量类型有 mode 来定义,有:
        - 标准 IoU(standard intersection over union) 
        - IOMA (intersection over mininum area)
    """
    if len(masks) == 0:
        return []
    if len(masks) == 1:
        return [0]

    if mode == 'IOU':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
        all_not_crowds = [False] * len(masks)
        ious = mask_util.iou(masks, masks, all_not_crowds)
    elif mode == 'IOMA':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
        all_crowds = [True] * len(masks)
        # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        ious = mask_util.iou(masks, masks, all_crowds)
        # ... = max(area(intersect(m1, m2)) / area(m2),
        #           area(intersect(m2, m1)) / area(m1))
        ious = np.maximum(ious, ious.transpose())
    elif mode == 'CONTAINMENT':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        # 度量了 m2 在 m1 中的部分(Which measures how much m2 is contained inside m1)
        all_crowds = [True] * len(masks)
        ious = mask_util.iou(masks, masks, all_crowds)
    else:
        raise NotImplementedError('Mode {} is unknown'.format(mode))

    scores = dets[:, 4]
    order = np.argsort(-scores)

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = ious[i, order[1:]]
        inds_to_keep = np.where(ovr <= thresh)[0]
        order = order[inds_to_keep + 1]

    return keep


def rle_masks_to_boxes(masks):
    """
    计算在 RLE 编码的 masks 列表中各 mask 的边界框bounding box.
    """
    if len(masks) == 0:
        return []

    decoded_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks]

    def get_bounds(flat_mask):
        inds = np.where(flat_mask > 0)[0]
        return inds.min(), inds.max()

    boxes = np.zeros((len(decoded_masks), 4))
    keep = [True] * len(decoded_masks)
    for i, mask in enumerate(decoded_masks):
        if mask.sum() == 0:
            keep[i] = False
            continue
        flat_mask = mask.sum(axis=0)
        x0, x1 = get_bounds(flat_mask)
        flat_mask = mask.sum(axis=1)
        y0, y1 = get_bounds(flat_mask)
        boxes[i, :] = (x0, y0, x1, y1)

    return boxes, np.where(keep)[0]

你可能感兴趣的:(Caffe2,Caffe2)