首先,导入包。
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon
#设置项目根目录
ROOT_DIR = os.path.abspath("../../")
#导入Mask RCNN
sys.path.append(ROOT_DIR)
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
%matplotlib inline
配置信息。
#下面两个代码块选择运行其一即可
# Shapes toy dataset
# import shapes
# config = shapes.ShapesConfig()
# MS COCO Dataset
import coco
config = coco.CocoConfig()
#COCO数据集的路径
COCO_DIR = "path/to/COCO"
#加载数据集dataset
#加载shapes数据集
if config.NAME == 'shapes':
dataset = shapes.ShapesDataset()
dataset.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
#加载coco数据集
elif config.NAME == "coco":
dataset = coco.CocoDataset()
dataset.load_coco(COCO_DIR, "train")
#下面这句必须在使用数据集之前调用
dataset.prepare()
#打印数据集信息
print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Count: {}".format(dataset.num_classes))
for i, info in enumerate(dataset.class_info):
print("{:3}. {:50}".format(i, info['name']))
可视化数据集信息和mask。
#随机选取一个样本加载并显示
image_ids = np.random.choice(dataset.image_ids, 4)
for image_id in image_ids:
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset.class_names)
这里代码没有使用数据集提供的bounding box坐标,而是通过mask计算得到。这样可以不管是什么数据集都可以用同一方法处理bounding boxes,并且可以很容易进行缩放,旋转和裁剪图像,因为我们是通过更新mask来产生bounding boxes,而不是通过每一种图像变换来计算bounding box的变换。
#随机选择图像并加载mask.
image_id = random.choice(dataset.image_ids)
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
#计算Bounding box
bbox = utils.extract_bboxes(mask)
#打印图像和一些额外的信息
print("image_id ", image_id, dataset.image_reference(image_id))
log("image", image)
log("mask", mask)
log("class_ids", class_ids)
log("bbox", bbox)
# 显示图像和instances
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)
image_id 74886 http://cocodataset.org/#explore?id=118535
image shape: (375, 500, 3) min: 0.00000 max: 255.00000
mask shape: (375, 500, 5) min: 0.00000 max: 1.00000
class_ids shape: (5,) min: 1.00000 max: 35.00000
bbox shape: (5, 4) min: 1.00000 max: 329.00000
因为在一个batch中要处理多幅图像,所以所有图像同一缩放到一个尺寸(1024x2014)。尽管代码中提供了一个长宽比参数,但是当图像不是正方形时,会在图像上/下或者左/右填充0。
#随机选择图像.
image_id = np.random.choice(dataset.image_ids, 1)[0]
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
original_shape = image.shape
# Resize
image, window, scale, padding, _ = utils.resize_image(
image,
min_dim=config.IMAGE_MIN_DIM,
max_dim=config.IMAGE_MAX_DIM,
mode=config.IMAGE_RESIZE_MODE)
mask = utils.resize_mask(mask, scale, padding)
#计算Bounding box
bbox = utils.extract_bboxes(mask)
#打印信息
print("image_id: ", image_id, dataset.image_reference(image_id))
print("Original shape: ", original_shape)
log("image", image)
log("mask", mask)
log("class_ids", class_ids)
log("bbox", bbox)
#显示图像instances
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)
image_id: 6480 http://cocodataset.org/#explore?id=402563
Original shape: (476, 640, 3)
image shape: (1024, 1024, 3) min: 0.00000 max: 255.00000
mask shape: (1024, 1024, 32) min: 0.00000 max: 1.00000
class_ids shape: (32,) min: 1.00000 max: 77.00000
bbox shape: (32, 4) min: 1.00000 max: 991.00000
当训练高分辨率图像时,产生的instance二值masks会比较大。比如,当训练1024x1024分辨率的图像时,单个instance的mask需要1MB的内存空间。如果一副图像有100个instances,就需要100MB的空间来存储masks。
为加速训练速度,对masks做了以下优化:
image_id = np.random.choice(dataset.image_ids, 1)[0]
image, image_meta, class_ids, bbox, mask = modellib.load_image_gt(
dataset, config, image_id, use_mini_mask=False)
log("image", image)
log("image_meta", image_meta)
log("class_ids", class_ids)
log("bbox", bbox)
log("mask", mask)
display_images([image]+[mask[:,:,i] for i in range(min(mask.shape[-1], 7))])
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)
image shape: (1024, 1024, 3) min: 0.00000 max: 255.00000
image_meta shape: (89,) min: 0.00000 max: 23221.00000
bbox shape: (1, 5) min: 62.00000 max: 578.00000
mask shape: (1024, 1024, 1) min: 0.00000 max: 1.00000
#使用mini mask
image, image_meta, class_ids, bbox, mask = modellib.load_image_gt(
dataset, config, image_id, augment=True, use_mini_mask=True)
log("mask", mask)
display_images([image]+[mask[:,:,i] for i in range(min(mask.shape[-1], 7))])
mask = utils.expand_mask(bbox, mask, image.shape)
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)
mask shape: (56, 56, 1) min: 0.00000 max: 1.00000
Anchors的顺序是很重要的。在训练和预测阶段使用的顺序应该是相同的,并且必须匹配卷积执行的顺序。
在FPN网络,anchors的顺序必须易于匹配卷积层的输出,用于预测anchor的分数和偏移。
Anchor Stride:在FPN结构中,feature maps只在前几层的分辨率较高。例如,输入图像的尺寸是1024x1024,则第一层的feature map的大小是256x256,产生大约200k个anchors(2562563)。这些anchors的大小是32x32像素,它们的stride相对于图像来说是4个像素,所以这里有很大的重叠。如果我们在feature map上每隔一个cell生成anchors的话就可以显著的减少负载。例如,将stride设置为2可以将anchors的数量减少到1/4。
所以和paper不同的是,这里设置anchor的stride等于2。
#生成Anchors
backbone_shapes = modellib.compute_backbone_shapes(config, config.IMAGE_SHAPE)
anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
config.RPN_ANCHOR_RATIOS,
backbone_shapes,
config.BACKBONE_STRIDES,
config.RPN_ANCHOR_STRIDE)
#打印anchors的信息
num_levels = len(backbone_shapes)
anchors_per_cell = len(config.RPN_ANCHOR_RATIOS)
print("Count: ", anchors.shape[0])
print("Scales: ", config.RPN_ANCHOR_SCALES)
print("ratios: ", config.RPN_ANCHOR_RATIOS)
print("Anchors per Cell: ", anchors_per_cell)
print("Levels: ", num_levels)
anchors_per_level = []
for l in range(num_levels):
num_cells = backbone_shapes[l][0] * backbone_shapes[l][1]
anchors_per_level.append(anchors_per_cell * num_cells // config.RPN_ANCHOR_STRIDE**2)
print("Anchors in Level {}: {}".format(l, anchors_per_level[l]))
Count: 65472
Scales: (32, 64, 128, 256, 512)
ratios: [0.5, 1, 2]
Anchors per Cell: 3
Levels: 5
Anchors in Level 0: 49152
Anchors in Level 1: 12288
Anchors in Level 2: 3072
Anchors in Level 3: 768
Anchors in Level 4: 192
##选定一个特定层次的位于feature map中间的一个cell内的anchors
#随机选择一幅图像加载并显示
image_id = np.random.choice(dataset.image_ids, 1)[0]
image, image_meta, _, _, _ = modellib.load_image_gt(dataset, config, image_id)
fig, ax = plt.subplots(1, figsize=(10, 10))
ax.imshow(image)
levels = len(backbone_shapes)
for level in range(levels):
colors = visualize.random_colors(levels)
# Compute the index of the anchors at the center of the image
level_start = sum(anchors_per_level[:level]) #前面levels的anchors的总和
level_anchors = anchors[level_start:level_start+anchors_per_level[level]]
print("Level {}. Anchors: {:6} Feature map Shape: {}".format(level, level_anchors.shape[0],
backbone_shapes[level]))
center_cell = backbone_shapes[level] // 2
center_cell_index = (center_cell[0] * backbone_shapes[level][1] + center_cell[1])
level_center = center_cell_index * anchors_per_cell
center_anchor = anchors_per_cell * (
(center_cell[0] * backbone_shapes[level][1] / config.RPN_ANCHOR_STRIDE**2) \
+ center_cell[1] / config.RPN_ANCHOR_STRIDE)
level_center = int(center_anchor)
#绘制anchors.按照亮度暗到亮的顺序显示.
for i, rect in enumerate(level_anchors[level_center:level_center+anchors_per_cell]):
y1, x1, y2, x2 = rect
p = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, facecolor='none',
edgecolor=(i+1)*np.array(colors[level]) / anchors_per_cell)
ax.add_patch(p)
#创建一个data generator
random_rois = 2000
g = modellib.data_generator(
dataset, config, shuffle=True, random_rois=random_rois,
batch_size=4,
detection_targets=True)
#获取下一幅图像
if random_rois:
[normalized_images, image_meta, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, rpn_rois, rois], \
[mrcnn_class_ids, mrcnn_bbox, mrcnn_mask] = next(g)
log("rois", rois)
log("mrcnn_class_ids", mrcnn_class_ids)
log("mrcnn_bbox", mrcnn_bbox)
log("mrcnn_mask", mrcnn_mask)
else:
[normalized_images, image_meta, rpn_match, rpn_bbox, gt_boxes, gt_masks], _ = next(g)
log("gt_class_ids", gt_class_ids)
log("gt_boxes", gt_boxes)
log("gt_masks", gt_masks)
log("rpn_match", rpn_match, )
log("rpn_bbox", rpn_bbox)
image_id = modellib.parse_image_meta(image_meta)["image_id"][0]
print("image_id: ", image_id, dataset.image_reference(image_id))
#移除mrcnn_class_ids的最后一个dim. 它仅仅是为了满足Keras对目标shape的限制.
mrcnn_class_ids = mrcnn_class_ids[:,:,0]
b = 0
#恢复原始图像(逆正规化)
sample_image = modellib.unmold_image(normalized_images[b], config)
#计算anchor偏移.
indices = np.where(rpn_match[b] == 1)[0]
refined_anchors = utils.apply_box_deltas(anchors[indices], rpn_bbox[b, :len(indices)] * config.RPN_BBOX_STD_DEV)
log("anchors", anchors)
log("refined_anchors", refined_anchors)
#获取positive anchors
positive_anchor_ids = np.where(rpn_match[b] == 1)[0]
print("Positive anchors: {}".format(len(positive_anchor_ids)))
negative_anchor_ids = np.where(rpn_match[b] == -1)[0]
print("Negative anchors: {}".format(len(negative_anchor_ids)))
neutral_anchor_ids = np.where(rpn_match[b] == 0)[0]
print("Neutral anchors: {}".format(len(neutral_anchor_ids)))
# 根据类别分解ROI
for c, n in zip(dataset.class_names, np.bincount(mrcnn_class_ids[b].flatten())):
if n:
print("{:23}: {}".format(c[:20], n))
# 显示positive anchors
visualize.draw_boxes(sample_image, boxes=anchors[positive_anchor_ids],
refined_boxes=refined_anchors)
#显示negative anchors
visualize.draw_boxes(sample_image, boxes=anchors[negative_anchor_ids])
#显示neutral anchors.它们不用于训练.
visualize.draw_boxes(sample_image, boxes=anchors[np.random.choice(neutral_anchor_ids, 100)])
if random_rois:
#类别明确的bboxes
bbox_specific = mrcnn_bbox[b, np.arange(mrcnn_bbox.shape[1]), mrcnn_class_ids[b], :]
#优化ROIs
refined_rois = utils.apply_box_deltas(rois[b].astype(np.float32), bbox_specific[:,:4] * config.BBOX_STD_DEV)
#类别明确masks
mask_specific = mrcnn_mask[b, np.arange(mrcnn_mask.shape[1]), :, :, mrcnn_class_ids[b]]
visualize.draw_rois(sample_image, rois[b], refined_rois, mask_specific, mrcnn_class_ids[b], dataset.class_names)
#有没有重复的ROIs?
rows = np.ascontiguousarray(rois[b]).view(np.dtype((np.void, rois.dtype.itemsize * rois.shape[-1])))
_, idx = np.unique(rows, return_index=True)
print("Unique ROIs: {} out of {}".format(len(idx), rois.shape[1]))
if random_rois:
#显示ROIs和相关的masks,bounding boxes
ids = random.sample(range(rois.shape[1]), 8)
images = []
titles = []
for i in ids:
image = visualize.draw_box(sample_image.copy(), rois[b,i,:4].astype(np.int32), [255, 0, 0])
image = visualize.draw_box(image, refined_rois[i].astype(np.int64), [0, 255, 0])
images.append(image)
titles.append("ROI {}".format(i))
images.append(mask_specific[i] * 255)
titles.append(dataset.class_names[mrcnn_class_ids[b,i]][:20])
display_images(images, titles, cols=4, cmap="Blues", interpolation="none")
#检查positive ROIs占一系列图像的比例.
if random_rois:
limit = 10
temp_g = modellib.data_generator(
dataset, config, shuffle=True, random_rois=10000,
batch_size=1, detection_targets=True)
total = 0
for i in range(limit):
_, [ids, _, _] = next(temp_g)
positive_rois = np.sum(ids[0] > 0)
total += positive_rois
print("{:5} {:5.2f}".format(positive_rois, positive_rois/ids.shape[1]))
print("Average percent: {:.2f}".format(total/(limit*ids.shape[1])))