Caffe2 - (二十四) Detectron 之 utils 函数(2)
1. env.py
"""Environment helper functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import imp
import os
import sys
def get_runtime_dir():
"""
寻找运行时的工作路径.
"""
return sys.path[0]
def get_py_bin_ext():
"""
寻找 python 的二进制扩展文件.
"""
return '.py'
def set_up_matplotlib():
"""
matplotlib 设置.
"""
import matplotlib
matplotlib.use('Agg')
def exit_on_error():
"""
出现错误时,退出 detectron tool.
"""
sys.exit(1)
def import_nccl_ops():
"""
导入 NCCL ops.
由于 NCCL 依赖已经在 Caffe2 gpu lib 中编译,不需要再加载 NCCL ops.
"""
pass
def get_caffe2_dir():
"""
寻找 Caffe2 所在路径.
"""
_fp, c2_path, _desc = imp.find_module('caffe2')
assert os.path.exists(c2_path), \
'Caffe2 not found at \'{}\''.format(c2_path)
c2_dir = os.path.dirname(os.path.abspath(c2_path))
return c2_dir
def get_detectron_ops_lib():
"""
寻找 Detectron ops library 库路径.
"""
c2_dir = get_caffe2_dir()
detectron_ops_lib = os.path.join(c2_dir, 'lib/libcaffe2_detectron_ops_gpu.so')
assert os.path.exists(detectron_ops_lib), \
('Detectron ops lib not found at \'{}\'; make sure that your Caffe2 '
'version includes Detectron module').format(detectron_ops_lib)
return detectron_ops_lib
def get_custom_ops_lib():
"""
寻找自定义的 osp library 库路径.
"""
lib_dir, _utils = os.path.split(os.path.dirname(__file__))
custom_ops_lib = os.path.join(lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
assert os.path.exists(custom_ops_lib), \
'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
return custom_ops_lib
2. c2.py
"""Helpful utilities for working with Caffe2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from six import string_types
import contextlib
from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import dyndep
from caffe2.python import scope
import utils.env as envu
def import_contrib_ops():
"""
导入 Detectron 所需的 contrib ops.
"""
envu.import_nccl_ops()
def import_detectron_ops():
"""
导入 Detectron ops.
"""
detectron_ops_lib = envu.get_detectron_ops_lib()
dyndep.InitOpsLibrary(detectron_ops_lib)
def import_custom_ops():
"""
导入自定义的 ops.
"""
custom_ops_lib = envu.get_custom_ops_lib()
dyndep.InitOpsLibrary(custom_ops_lib)
def SuffixNet(name, net, prefix_len, outputs):
"""
从给定的 net 返回新 Net. 新 Net 只包括移除 first `prefix_len` 个 ops 后的 ops.
新 Net 是 net 的一个后缀suffix.
在 outputs 中的 Blobs 作为 external outpout blobs 被注册.
"""
outputs = BlobReferenceList(outputs)
for output in outputs:
assert net.BlobIsDefined(output)
new_net = net.Clone(name)
del new_net.Proto().op[:]
del new_net.Proto().external_input[:]
del new_net.Proto().external_output[:]
new_net.Proto().op.extend(net.Proto().op[prefix_len:])
input_names = [
i for op in new_net.Proto().op for i in op.input
if not new_net.BlobIsDefined(i)]
new_net.Proto().external_input.extend(input_names)
output_names = [str(o) for o in outputs]
new_net.Proto().external_output.extend(output_names)
return new_net, [new_net.GetBlobRef(o) for o in output_names]
def BlobReferenceList(blob_ref_or_list):
"""
将参数以 BlobReferences 列表的形式返回.
"""
if isinstance(blob_ref_or_list, core.BlobReference):
return [blob_ref_or_list]
elif type(blob_ref_or_list) in (list, tuple):
for b in blob_ref_or_list:
assert isinstance(b, core.BlobReference)
return blob_ref_or_list
else:
raise TypeError('blob_ref_or_list must be a BlobReference or a list/tuple of BlobReferences')
def UnscopeName(possibly_scoped_name):
"""
从一个(可能的)作用域名字中移除任何名字. Remove any name scoping from a (possibly) scoped name
如将名字 'gpu_0/foo' 转化为 'foo'.
"""
assert isinstance(possibly_scoped_name, string_types)
return possibly_scoped_name[
possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]
@contextlib.contextmanager
def NamedCudaScope(gpu_id):
"""
创建 GPU name scope 和 CUDA device scope.
用于 reduce `with ...` nesting levels.
"""
with GpuNameScope(gpu_id):
with CudaScope(gpu_id):
yield
@contextlib.contextmanager
def GpuNameScope(gpu_id):
"""
创建 GPU device `gpu_id` 的名字域 name scope.
"""
with core.NameScope('gpu_{:d}'.format(gpu_id)):
yield
@contextlib.contextmanager
def CudaScope(gpu_id):
"""
创建 GPU device `gpu_id` 的 CUDA device scope.
"""
gpu_dev = CudaDevice(gpu_id)
with core.DeviceScope(gpu_dev):
yield
@contextlib.contextmanager
def CpuScope():
"""
创建 CPU device scope.
"""
cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
with core.DeviceScope(cpu_dev):
yield
def CudaDevice(gpu_id):
"""
创建 Cuda device.
选择 gpud_id 设备.
"""
return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
def gauss_fill(std):
"""
减少冗余.
Gaussian fill helper
"""
return ('GaussianFill', {'std': std})
def const_fill(value):
"""
减少冗余.
Constant fill helper.
"""
return ('ConstantFill', {'value': value})
3. io.py
"""IO utilities."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import cPickle as pickle
import hashlib
import logging
import os
import re
import sys
import urllib2
logger = logging.getLogger(__name__)
_DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'
def save_object(obj, file_name):
"""
序列化 Python 对象object,进行存储.
"""
file_name = os.path.abspath(file_name)
with open(file_name, 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def cache_url(url_or_file, cache_dir):
"""
根据 URL 下载文件到 cache_dir,并返回下载缓存文件的路径.
如果参数不是 URL,则直接返回.
"""
is_url = re.match(r'^(?:http)s?://', url_or_file, re.IGNORECASE) is not None
if not is_url:
return url_or_file
url = url_or_file
assert url.startswith(_DETECTRON_S3_BASE_URL), \
('Detectron only automatically caches URLs in the Detectron S3 '
'bucket: {}').format(_DETECTRON_S3_BASE_URL)
cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
if os.path.exists(cache_file_path):
assert_cache_file_is_ok(url, cache_file_path)
return cache_file_path
cache_file_dir = os.path.dirname(cache_file_path)
if not os.path.exists(cache_file_dir):
os.makedirs(cache_file_dir)
logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
download_url(url, cache_file_path)
assert_cache_file_is_ok(url, cache_file_path)
return cache_file_path
def assert_cache_file_is_ok(url, file_path):
"""
检验 cache file 的 hash 值是否正确.
文件已经缓存,验证其 md3sum 是否匹配,并返回其 local path.
"""
cache_file_md5sum = _get_file_md5sum(file_path)
ref_md5sum = _get_reference_md5sum(url)
assert cache_file_md5sum == ref_md5sum, \
('Target URL {} appears to be downloaded to the local cache file '
'{}, but the md5 hash of the local file does not match the '
'reference (actual: {} vs. expected: {}). You may wish to delete '
'the cached file and try again to trigger automatic '
'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)
def _progress_bar(count, total):
"""
显示下载进度.
参考:
https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
"""
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write(' [{}] {}% of {:.1f}MB file \r'.format(bar, percents, total / 1024 / 1024) )
sys.stdout.flush()
if count >= total:
sys.stdout.write('\n')
def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
"""
下载 URL,并写入 dst_file_path.
参考:
https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
"""
response = urllib2.urlopen(url)
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
bytes_so_far = 0
with open(dst_file_path, 'wb') as f:
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if progress_hook:
progress_hook(bytes_so_far, total_size)
f.write(chunk)
return bytes_so_far
def _get_file_md5sum(file_name):
"""
计算文件的 md5 hash 值.
"""
hash_obj = hashlib.md5()
with open(file_name, 'r') as f:
hash_obj.update(f.read())
return hash_obj.hexdigest()
def _get_reference_md5sum(url):
"""
根据惯例,url 的 md5 hash 值保存在 url + '.md5sum'.
"""
url_md5sum = url + '.md5sum'
md5sum = urllib2.urlopen(url_md5sum).read().strip()
return md5sum
4. keypoints.py
"""Keypoint utilities (somewhat specific to COCO keypoints)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import cv2
import numpy as np
from core.config import cfg
import utils.blob as blob_utils
def get_keypoints():
"""
获取 COCO kyepoints 以及其 left/right 翻转的对应映射.
test 中 keypoints 不包括在 COCO json 中,这里进行提供.
"""
keypoints = [
'nose',
'left_eye',
'right_eye',
'left_ear',
'right_ear',
'left_shoulder',
'right_shoulder',
'left_elbow',
'right_elbow',
'left_wrist',
'right_wrist',
'left_hip',
'right_hip',
'left_knee',
'right_knee',
'left_ankle',
'right_ankle'
]
keypoint_flip_map = {
'left_eye': 'right_eye',
'left_ear': 'right_ear',
'left_shoulder': 'right_shoulder',
'left_elbow': 'right_elbow',
'left_wrist': 'right_wrist',
'left_hip': 'right_hip',
'left_knee': 'right_knee',
'left_ankle': 'right_ankle'
}
return keypoints, keypoint_flip_map
def get_person_class_index():
"""
COCO 中 person class 的索引值 - 1.
"""
return 1
def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):
"""
left/right 翻转 keypoints 坐标.
keypoints 和 keypoints_flip_map 由 get_keypoints() 得到.
"""
flipped_kps = keypoint_coords.copy()
for lkp, rkp in keypoint_flip_map.items():
lid = keypoints.index(lkp)
rid = keypoints.index(rkp)
flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]
flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]
flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1
inds = np.where(flipped_kps[:, 2, :] == 0)
flipped_kps[inds[0], 0, inds[1]] = 0
return flipped_kps
def flip_heatmaps(heatmaps):
"""
水平翻转 heatmaps.
"""
keypoints, flip_map = get_keypoints()
heatmaps_flipped = heatmaps.copy()
for lkp, rkp in flip_map.items():
lid = keypoints.index(lkp)
rid = keypoints.index(rkp)
heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]
heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]
heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]
return heatmaps_flipped
def heatmaps_to_keypoints(maps, rois):
"""
从 heatmaps 得到预测的 keypoints 位置.
输出格式为:(#rois, 4, #keypoints)
- 4 rows 对应于每个 keypoints 的 (x, y, logit, prob).
该函数将 HEATMAP_SIZE x HEATMAP_SIZE image 中的离散坐标转换为连续的 keypoints 坐标.
采用 Heckbert 1990: c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.) 的变换,来保持 keypoints_to_heatmap_labels 的一致性.
"""
offset_x = rois[:, 0]
offset_y = rois[:, 1]
widths = rois[:, 2] - rois[:, 0]
heights = rois[:, 3] - rois[:, 1]
widths = np.maximum(widths, 1)
heights = np.maximum(heights, 1)
widths_ceil = np.ceil(widths)
heights_ceil = np.ceil(heights)
maps = np.transpose(maps, [0, 2, 3, 1])
min_size = cfg.KRCNN.INFERENCE_MIN_SIZE
xy_preds = np.zeros((len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)
for i in range(len(rois)):
if min_size > 0:
roi_map_width = int(np.maximum(widths_ceil[i], min_size))
roi_map_height = int(np.maximum(heights_ceil[i], min_size))
else:
roi_map_width = widths_ceil[i]
roi_map_height = heights_ceil[i]
width_correction = widths[i] / roi_map_width
height_correction = heights[i] / roi_map_height
roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height),
interpolation=cv2.INTER_CUBIC)
roi_map = np.transpose(roi_map, [2, 0, 1])
roi_map_probs = scores_to_probs(roi_map.copy())
w = roi_map.shape[2]
for k in range(cfg.KRCNN.NUM_KEYPOINTS):
pos = roi_map[k, :, :].argmax()
x_int = pos % w
y_int = (pos - x_int) // w
assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max())
x = (x_int + 0.5) * width_correction
y = (y_int + 0.5) * height_correction
xy_preds[i, 0, k] = x + offset_x[i]
xy_preds[i, 1, k] = y + offset_y[i]
xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]
return xy_preds
def keypoints_to_heatmap_labels(keypoints, rois):
"""
对 target heatmap 中的 keypoints 位置进行编码,以用于 SoftmaxWithLoss.
将 keypoints 从连续图片坐标的半开区间 [x1, x2),映射到离散图片坐标的闭区间 [0, HEATMAP_SIZE - 1].
采用 Heckbert 1990 ("What is the coordinate of a pixel?") 的变换方案: d = floor(c) 和 c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.)
"""
assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS
shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
heatmaps = blob_utils.zeros(shape)
weights = blob_utils.zeros(shape)
offset_x = rois[:, 0]
offset_y = rois[:, 1]
scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])
for kp in range(keypoints.shape[2]):
vis = keypoints[:, 2, kp] > 0
x = keypoints[:, 0, kp].astype(np.float32)
y = keypoints[:, 1, kp].astype(np.float32)
x_boundary_inds = np.where(x == rois[:, 2])[0]
y_boundary_inds = np.where(y == rois[:, 3])[0]
x = (x - offset_x) * scale_x
x = np.floor(x)
if len(x_boundary_inds) > 0:
x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1
y = (y - offset_y) * scale_y
y = np.floor(y)
if len(y_boundary_inds) > 0:
y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1
valid_loc = np.logical_and(np.logical_and(x >= 0, y >= 0),
np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))
valid = np.logical_and(valid_loc, vis)
valid = valid.astype(np.int32)
lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
heatmaps[:, kp] = lin_ind * valid
weights[:, kp] = valid
return heatmaps, weights
def scores_to_probs(scores):
"""
将 CxHxW 的 scores 转换为空间概率."""
channels = scores.shape[0]
for c in range(channels):
temp = scores[c, :, :]
max_score = temp.max()
temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
scores[c, :, :] = temp
return scores
def nms_oks(kp_predictions, rois, thresh):
"""
基于 kp predictions 进行 NMS.
"""
scores = np.mean(kp_predictions[:, 2, :], axis=1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = compute_oks(kp_predictions[i], rois[i], kp_predictions[order[1:]], rois[order[1:]])
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):
"""
计算预测的 keypoints 关于 gt_keypoints 的 OKS.
src_keypoints: 4xK
src_roi: 4x1
dst_keypoints: Nx4xK
dst_roi: Nx4
"""
sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
vars = (sigmas * 2)**2
src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)
dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]
dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]
e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2
e = np.sum(np.exp(-e), axis=1) / e.shape[1]
return e
5. segm.py
"""
处理 COCO 格式的 segmentation masks 的函数.
所使用到的项有:
- mask: 2D numpy array 格式的二值 mask. a binary mask encoded as a 2D numpy array
- segm: segmentation mask 格式,(COCO 有两种 segmentation mask 格式:polygon or RLE)
- polygon: COCO 的多边形格式(polygon format)
- RLE: COCO's run length encoding format
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import pycocotools.mask as mask_util
def flip_segms(segms, height, width):
"""
对 masks 列表中的各 mask 进行 left/right 翻转.
"""
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects([rle], height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1, :]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if type(segm) == list:
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
assert type(segm) == dict
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def polys_to_mask(polygons, height, width):
"""
将 COCO 的多边形分割(polygon segmentation) 格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
多边形分割(polygon segmentation) 被理解为在 height x width 图片中的封闭区域.
得到的 mask shape 是 (height, width).
"""
rle = mask_util.frPyObjects(polygons, height, width)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def mask_to_bbox(mask):
"""
计算二值 mask 的边界框 bounding box.
"""
xs = np.where(np.sum(mask, axis=0) > 0)[0]
ys = np.where(np.sum(mask, axis=1) > 0)[0]
if len(xs) == 0 or len(ys) == 0:
return None
x0 = xs[0]
x1 = xs[-1]
y0 = ys[0]
y1 = ys[-1]
return np.array((x0, y0, x1, y1), dtype=np.float32)
def polys_to_mask_wrt_box(polygons, box, M):
"""
将 COCO 多边形分割(polygon segmentation)格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
多边形分割(polygon segmentation) 被理解为在给定 box 的封闭区域,大小为 M x M 的 mask.
得到的 mask shape 是 (M, M).
"""
w = box[2] - box[0]
h = box[3] - box[1]
w = np.maximum(w, 1)
h = np.maximum(h, 1)
polygons_norm = []
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p[0::2] = (p[0::2] - box[0]) * M / w
p[1::2] = (p[1::2] - box[1]) * M / h
polygons_norm.append(p)
rle = mask_util.frPyObjects(polygons_norm, M, M)
mask = np.array(mask_util.decode(rle), dtype=np.float32)
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0, dtype=np.float32)
return mask
def polys_to_boxes(polys):
"""
将多边形列表(polygons list) 转换为边界框 bounding boxes array.
"""
boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
for i in range(len(polys)):
poly = polys[i]
x0 = min(min(p[::2]) for p in poly)
x1 = max(max(p[::2]) for p in poly)
y0 = min(min(p[1::2]) for p in poly)
y1 = max(max(p[1::2]) for p in poly)
boxes_from_polys[i, :] = [x0, y0, x1, y1]
return boxes_from_polys
def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'):
"""
组合 all_masks 中多个重叠 masks 来返回新的 mask(对应于 top_masks).
支持两种 masks 组合方法:
- AVG - 使用重叠 mask 像素的加权平均值(weighted average of overlapping mask pixels)
- UNION - 所有 mask 像素的并集(union of all mask pixels)s.
"""
if len(top_masks) == 0:
return
all_not_crowd = [False] * len(all_masks)
top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
decoded_all_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks]
decoded_top_masks = [
np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
]
all_boxes = all_dets[:, :4].astype(np.int32)
all_scores = all_dets[:, 4]
mask_shape = decoded_all_masks[0].shape
mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
for k in range(len(all_masks)):
ref_box = all_boxes[k]
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, mask_shape[1])
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, mask_shape[0])
mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
mask_weights = np.maximum(mask_weights, 1e-5)
top_segms_out = []
for k in range(len(top_masks)):
if decoded_top_masks[k].sum() == 0:
top_segms_out.append(top_masks[k])
continue
inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
if len(inds_to_vote) == 1:
top_segms_out.append(top_masks[k])
continue
masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
if method == 'AVG':
ws = mask_weights[inds_to_vote]
soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
elif method == 'UNION':
soft_mask = np.sum(masks_to_vote, axis=0)
mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
else:
raise NotImplementedError('Method {} is unknown'.format(method))
rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
top_segms_out.append(rle)
return top_segms_out
def rle_mask_nms(masks, dets, thresh, mode='IOU'):
"""
基于 masks 间的重叠度量(overlap measurement) 进行贪婪 NMS 处理.
度量类型有 mode 来定义,有:
- 标准 IoU(standard intersection over union)
- IOMA (intersection over mininum area)
"""
if len(masks) == 0:
return []
if len(masks) == 1:
return [0]
if mode == 'IOU':
all_not_crowds = [False] * len(masks)
ious = mask_util.iou(masks, masks, all_not_crowds)
elif mode == 'IOMA':
all_crowds = [True] * len(masks)
ious = mask_util.iou(masks, masks, all_crowds)
ious = np.maximum(ious, ious.transpose())
elif mode == 'CONTAINMENT':
all_crowds = [True] * len(masks)
ious = mask_util.iou(masks, masks, all_crowds)
else:
raise NotImplementedError('Mode {} is unknown'.format(mode))
scores = dets[:, 4]
order = np.argsort(-scores)
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = ious[i, order[1:]]
inds_to_keep = np.where(ovr <= thresh)[0]
order = order[inds_to_keep + 1]
return keep
def rle_masks_to_boxes(masks):
"""
计算在 RLE 编码的 masks 列表中各 mask 的边界框bounding box.
"""
if len(masks) == 0:
return []
decoded_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks]
def get_bounds(flat_mask):
inds = np.where(flat_mask > 0)[0]
return inds.min(), inds.max()
boxes = np.zeros((len(decoded_masks), 4))
keep = [True] * len(decoded_masks)
for i, mask in enumerate(decoded_masks):
if mask.sum() == 0:
keep[i] = False
continue
flat_mask = mask.sum(axis=0)
x0, x1 = get_bounds(flat_mask)
flat_mask = mask.sum(axis=1)
y0, y1 = get_bounds(flat_mask)
boxes[i, :] = (x0, y0, x1, y1)
return boxes, np.where(keep)[0]