mask-rcnn 在windows10 和 linux环境下均可以运行,笔者配置了两台机器,如下配置:
windows10:
显卡:GTX1070 TI 配置的cuda 10.0 和 cudnn8.0,tensorfolw-gpu 版本13.0rc1
ubuntu 18.04:
显卡:GTX1060配置的cuda 9.0 和 cudnn7.0,tensorflow-gpu 版本12.0
实际测试结果:ubuntu 训练速度比 windows10 的速度快很多,大概每运行一步,ubuntu 的时间在0.9s,windows 10 在2s
如果没有显卡,这个项目训练速度及其慢,训练使用内存大约16GB上下
从githhub上面clone项目到本地,仓库地址如下
https://github.com/matterport/Mask_RCNN.git
下载完成之后,安装requirements.txt 里面的依赖项目,如果本机已经安装,则跳过这些项目
建议手动一项一项安装,而不是
pip3 install requirements.txt
前置条件安装好之后,安装mask-rcnn,执行
python setup.py install
如果遇到错误,则编辑这个脚本,跳过install_reqs这个检查项目
预训练模型下载,从github上面下载coco.h5这预先训练好的模型,预先训练好的模型下载页面地址如下:
https://github.com/matterport/Mask_RCNN/releases
预训练的权重的下载地址如下:
https://github.com/matterport/Mask_RCNN/releases/download/v2.1/mask_rcnn_balloon.5
这个权重是气球项目训练的一个结果集合
标记软件,我使用的是labelme,使用如下命令安装,如果是linux系统,那么使用pip3
pip install labelme
在控制台输入labelme即可打开这个软件,windows版本打开之后如下图1所示
本文采用生成随机的验证码进行数据测试,生成验证的代码如下pre_data.py,实际上图像在进行训练时在图像预处理的代码可能更复杂一些,这个代码生成100张图片验证码。
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
import time
def rnd_char():
'''
随机一个字母或者数字
:return:
'''
# 随机一个字母或者数字
i = random.randint(1,3)
if i == 1:
# 随机个数字的十进制ASCII码
an = random.randint(97, 122)
elif i == 2:
# 随机个小写字母的十进制ASCII码
an = random.randint(65, 90)
else:
# 随机个大写字母的十进制ASCII码
an = random.randint(48, 57)
# 根据Ascii码转成字符,return回去
return chr(an)
def rnd_color2():
'''
随机颜色,规定一定范围
:return:
'''
return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127))
def rnd_color():
'''
随机颜色,规定一定范围
:return:
'''
return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))
def create_code():
# 240 x 60:
width = 60 * 4
height = 60
image = Image.new('RGB', (width, height), (192, 192, 192))
# 创建Font对象:
font = ImageFont.truetype(r'E:\pycharm\code_of_auth\font\simfang.ttf',36)
# 创建Draw对象:
draw = ImageDraw.Draw(image)
# 填充每个像素:
for x in range(0, width, 20):
for y in range(0, height, 10):
draw.point((x, y), fill=rnd_color())
# 填充字符
_str = ""
# 填入4个随机的数字或字母作为验证码
for t in range(4):
c = rnd_char()
_str = "{}{}".format(_str, c)
# 随机距离图片上边高度,但至少距离30像素
h = random.randint(1, height - 30)
# 宽度的化,每个字符占图片宽度1/4,在加上10个像素空隙
w = width / 4 * t + 10
draw.text((w, h), c, font=font, fill=rnd_color2())
# 实际项目中,会将验证码 保存在数据库,并加上时间字段
print("验证码生成完毕{}".format(_str))
t = time.time()
current_time = int(round(t * 1000))
save_dir = 'train_data/auth_code_{}.jpg'.format(current_time)
image.save(save_dir, 'jpeg')
for i in range(100):
create_code()
生成的图片如图2所示,下图
在终端输入labelme,再打开文件夹,即可定位到标记文件夹,如下图所示,图3
1、在图像进入这一步之前,最好预先处理一次图像
2、图像标记成矩形是在图像中的物体比较容易区分的情况下,最好的情况是能用多边形去标记物体
标记完成图像之后,将所有json文件拷贝至一个文件夹中,我本地命名为:json_file,批量生成数据文件的代码如下ImagePath.py 和labelme_gen.py 。
ImagePath.py:
import glob
import os
#读取所有图片的路径
def read_image(paths, type):
imgs = []
for im in glob.glob(paths+'/*.' + type):
imgs.append(im)
return imgs
labelme_gen.py:
import ImagePath as ImagePath
import os
data_path = 'json_file'
flle_list = ImagePath.read_image(data_path, 'json')
for file in flle_list:
cmd = 'labelme_json_to_dataset ' + file
os.system(cmd)
labelme生成的数据文件夹拷贝出来,我命名为labelme_json,准备一个pic文件夹,里面放标记的图片,图像格式png和jpg都可以,还有之前下载的气球模型预训练权重,就可以开始训练了,训练代码文件,train_model.py
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from mrcnn.config import Config
#import utils
from mrcnn import model as modellib,utils
from mrcnn import visualize
import yaml
from mrcnn.model import log
from PIL import Image
ROOT_DIR = os.getcwd()
#模型保存目录
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
iter_num=0
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
# 基础设置
dataset_root_path = "E:\\pycharm\\code_of_auth\\"
labelme_json_path = dataset_root_path + 'labelme_json'
img_floder = dataset_root_path + "pic"
# yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)
class ShapesConfig(Config):
NAME = "shapes"
GPU_COUNT = 1
IMAGES_PER_GPU = 1
NUM_CLASSES = 1 + 36 # background + 10 numbers + 26 char
# Use smaller anchors because our image and objects are small , 目标的尺寸,自己去测量几个放到这里
RPN_ANCHOR_SCALES = (40, 50, 60) # anchor side in pixels
TRAIN_ROIS_PER_IMAGE = 100
STEPS_PER_EPOCH = 100
VALIDATION_STEPS = 50
config = ShapesConfig()
config.display()
class TrainDataset(utils.Dataset):
# 得到该图中有多少个实例(物体)
def get_obj_index(self, image):
n = np.max(image)
return n
# 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
def from_yaml_get_class(self, image_id):
info = self.image_info[image_id]
with open(info['yaml_path']) as f:
temp = yaml.load(f.read(),Loader=yaml.FullLoader)
labels = temp['label_names']
del labels[0]
return labels
# 重新写draw_mask
def draw_mask(self, num_obj, mask, image, image_id):
# print("draw_mask-->",image_id)
# print("self.image_info",self.image_info)
info = self.image_info[image_id]
for index in range(num_obj):
for i in range(info['width']):
for j in range(info['height']):
at_pixel = image.getpixel((i, j))
if at_pixel == index + 1:
mask[j, i, index] = 1
return mask
# 重新写load_shapes,里面包含自己的类别,可以任意添加
# 并在self.image_info信息中添加了path、mask_path 、yaml_path
# yaml_pathdataset_root_path = "/tongue_dateset/"
def load_shapes(self, count, img_floder, imglist, dataset_root_path):
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes
self.add_class("shapes", 1, "0")
self.add_class("shapes", 2, "1")
self.add_class("shapes", 3, "2")
self.add_class("shapes", 4, "3")
self.add_class("shapes", 5, "4")
self.add_class("shapes", 6, "5")
self.add_class("shapes", 7, "6")
self.add_class("shapes", 8, "7")
self.add_class("shapes", 9, "8")
self.add_class("shapes", 10, "9")
self.add_class("shapes", 11, "a")
self.add_class("shapes", 12, "b")
self.add_class("shapes", 13, "c")
self.add_class("shapes", 14, "d")
self.add_class("shapes", 15, "e")
self.add_class("shapes", 16, "f")
self.add_class("shapes", 17, "g")
self.add_class("shapes", 18, "h")
self.add_class("shapes", 19, "i")
self.add_class("shapes", 20, "j")
self.add_class("shapes", 21, "k")
self.add_class("shapes", 22, "l")
self.add_class("shapes", 23, "m")
self.add_class("shapes", 24, "n")
self.add_class("shapes", 25, "o")
self.add_class("shapes", 26, "p")
self.add_class("shapes", 27, "q")
self.add_class("shapes", 28, "r")
self.add_class("shapes", 29, "s")
self.add_class("shapes", 30, "t")
self.add_class("shapes", 31, "u")
self.add_class("shapes", 32, "v")
self.add_class("shapes", 33, "w")
self.add_class("shapes", 34, "x")
self.add_class("shapes", 35, "y")
self.add_class("shapes", 36, "z")
for i in range(count):
# 获取图片宽和高
print(i)
filestr = imglist[i].replace(".jpg", "")
# print(imglist[i],"-->",cv_img.shape[1],"--->",cv_img.shape[0])
# print("id-->", i, " imglist[", i, "]-->", imglist[i],"filestr-->",filestr)
# filestr = filestr.split("_")[1]
mask_path = dataset_root_path + "\\" + filestr + "_json\\label.png"
yaml_path = dataset_root_path + "\\" + filestr + "_json\\info.yaml"
img_path = dataset_root_path + "\\" + filestr + "_json\\img.png"
print(img_path)
cv_img = cv2.imread(img_path)
self.add_image("shapes", image_id=i, path=img_floder + "\\" + imglist[i],
width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)
def load_mask(self, image_id):
"""Generate instance masks for shapes of the given image ID.
"""
global iter_num
print("image_id", image_id)
info = self.image_info[image_id]
count = 1 # number of object
img = Image.open(info['mask_path'])
num_obj = self.get_obj_index(img)
mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
mask = self.draw_mask(num_obj, mask, img, image_id)
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
for i in range(count - 2, -1, -1):
mask[:, :, i] = mask[:, :, i] * occlusion
occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
labels = []
labels = self.from_yaml_get_class(image_id)
labels_form = []
for i in range(len(labels)):
if labels[i].find("0") != -1:
labels_form.append("0")
elif labels[i].find("1") != -1:
labels_form.append("1")
elif labels[i].find("2") != -1:
labels_form.append("2")
elif labels[i].find("3") != -1:
labels_form.append("3")
elif labels[i].find("4") != -1:
labels_form.append("4")
elif labels[i].find("5") != -1:
labels_form.append("5")
elif labels[i].find("6") != -1:
labels_form.append("6")
elif labels[i].find("7") != -1:
labels_form.append("7")
elif labels[i].find("8") != -1:
labels_form.append("8")
elif labels[i].find("9") != -1:
labels_form.append("9")
elif labels[i].find("a") != -1:
labels_form.append("a")
elif labels[i].find("b") != -1:
labels_form.append("b")
elif labels[i].find("c") != -1:
labels_form.append("c")
elif labels[i].find("d") != -1:
labels_form.append("d")
elif labels[i].find("e") != -1:
labels_form.append("e")
elif labels[i].find("f") != -1:
labels_form.append("f")
elif labels[i].find("g") != -1:
labels_form.append("g")
elif labels[i].find("h") != -1:
labels_form.append("h")
elif labels[i].find("i") != -1:
labels_form.append("i")
elif labels[i].find("j") != -1:
labels_form.append("j")
elif labels[i].find("k") != -1:
labels_form.append("k")
elif labels[i].find("l") != -1:
labels_form.append("l")
elif labels[i].find("m") != -1:
labels_form.append("m")
elif labels[i].find("n") != -1:
labels_form.append("n")
elif labels[i].find("o") != -1:
labels_form.append("o")
elif labels[i].find("p") != -1:
labels_form.append("p")
elif labels[i].find("q") != -1:
labels_form.append("q")
elif labels[i].find("r") != -1:
labels_form.append("r")
elif labels[i].find("s") != -1:
labels_form.append("s")
elif labels[i].find("t") != -1:
labels_form.append("t")
elif labels[i].find("u") != -1:
labels_form.append("u")
elif labels[i].find("v") != -1:
labels_form.append("v")
elif labels[i].find("w") != -1:
labels_form.append("w")
elif labels[i].find("x") != -1:
labels_form.append("x")
elif labels[i].find("y") != -1:
labels_form.append("y")
elif labels[i].find("z") != -1:
labels_form.append("z")
class_ids = np.array([self.class_names.index(s) for s in labels_form])
return mask, class_ids.astype(np.int32)
def get_ax(rows=1, cols=1, size=8):
"""Return a Matplotlib Axes array to be used in
all visualizations in the notebook. Provide a
central point to control graph sizes.
Change the default size attribute to control the size
of rendered images
"""
_, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
return ax
# train与val数据集准备
dataset_train = TrainDataset()
dataset_train.load_shapes(count, img_floder, imglist, labelme_json_path)
dataset_train.prepare()
# print("dataset_train-->",dataset_train._image_ids)
dataset_val = TrainDataset()
dataset_val.load_shapes(count, img_floder, imglist, labelme_json_path)
dataset_val.prepare()
# print("dataset_val-->",dataset_val._image_ids)
# Load and display random samples
# image_ids = np.random.choice(dataset_train.image_ids, 4)
# for image_id in image_ids:
# image = dataset_train.load_image(image_id)
# mask, class_ids = dataset_train.load_mask(image_id)
# visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
# Which weights to start with?
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
# Load weights trained on MS COCO, but skip layers that
# are different due to the different number of classes
# See README for instructions to download the COCO weights
# print(COCO_MODEL_PATH)
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last()[1], by_name=True)
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val,learning_rate=config.LEARNING_RATE, epochs=100, layers='heads')
# Fine tune all layers
# Passing layers="all" trains all layers. You can also
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val,learning_rate=config.LEARNING_RATE / 10, epochs=100, layers="all")
一般情况下,需要修改的是,种类,目标物体的尺寸,load_shapes和load_mask函数里面的标签名称,load_shapes里面有个图像格式的隐藏代码,如果图片格式是png则
filestr = imglist[i].replace(".png", "")
如果是打算重新训练数据,那么 model.load_weights 修改成自己的模型文件位置,一般是训练好之后保存的。
9、预测模型,预测模型使用pre_data.py 和 get_result.py 两个文件,get_result.py 是基于原来的代码复制出来的,为的就是改变里面文字的显示颜色。
get_result.py:
"""
Mask R-CNN
Display and Visualization Functions.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""
import os
import sys
import random
import itertools
import colorsys
import numpy as np
from skimage.measure import find_contours
import matplotlib.pyplot as plt
from matplotlib import patches, lines
from matplotlib.patches import Polygon
import IPython.display
# Root directory of the project
ROOT_DIR = os.path.abspath("../")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
############################################################
# Visualization
############################################################
def display_images(images, titles=None, cols=4, cmap=None, norm=None,
interpolation=None):
"""Display the given set of images, optionally with titles.
images: list or array of image tensors in HWC format.
titles: optional. A list of titles to display with each image.
cols: number of images per row
cmap: Optional. Color map to use. For example, "Blues".
norm: Optional. A Normalize instance to map values to colors.
interpolation: Optional. Image interpolation to use for display.
"""
titles = titles if titles is not None else [""] * len(images)
rows = len(images) // cols + 1
plt.figure(figsize=(14, 14 * rows // cols))
i = 1
for image, title in zip(images, titles):
plt.subplot(rows, cols, i)
plt.title(title, fontsize=9)
plt.axis('off')
plt.imshow(image.astype(np.uint8), cmap=cmap,
norm=norm, interpolation=interpolation)
i += 1
plt.show()
def random_colors(N, bright=True):
"""
Generate random colors.
To get visually distinct colors, generate them in HSV space then
convert to RGB.
"""
brightness = 1.0 if bright else 0.7
hsv = [(i / N, 1, brightness) for i in range(N)]
colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
random.shuffle(colors)
return colors
def apply_mask(image, mask, color, alpha=0.5):
"""Apply the given mask to the image.
"""
for c in range(3):
image[:, :, c] = np.where(mask == 1,
image[:, :, c] *
(1 - alpha) + alpha * color[c] * 255,
image[:, :, c])
return image
def display_instances(image, boxes, masks, class_ids, class_names,
scores=None, title="",
figsize=(16, 16), ax=None,
show_mask=True, show_bbox=True,
colors=None, captions=None):
"""
boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
masks: [height, width, num_instances]
class_ids: [num_instances]
class_names: list of class names of the dataset
scores: (optional) confidence scores for each box
title: (optional) Figure title
show_mask, show_bbox: To show masks and bounding boxes or not
figsize: (optional) the size of the image
colors: (optional) An array or colors to use with each object
captions: (optional) A list of strings to use as captions for each object
"""
# Number of instances
N = boxes.shape[0]
if not N:
print("\n*** No instances to display *** \n")
else:
assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
# If no axis is passed, create one and automatically call show()
auto_show = False
if not ax:
_, ax = plt.subplots(1, figsize=figsize)
auto_show = True
# Generate random colors
colors = colors or random_colors(N)
# Show area outside image boundaries.
height, width = image.shape[:2]
ax.set_ylim(height + 10, -10)
ax.set_xlim(-10, width + 10)
ax.axis('off')
ax.set_title(title)
masked_image = image.astype(np.uint32).copy()
for i in range(N):
color = colors[i]
# Bounding box
if not np.any(boxes[i]):
# Skip this instance. Has no bbox. Likely lost in image cropping.
continue
y1, x1, y2, x2 = boxes[i]
if show_bbox:
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
alpha=0.7, linestyle="dashed",
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Label
if not captions:
class_id = class_ids[i]
score = scores[i] if scores is not None else None
label = class_names[class_id]
caption = "{} {:.3f}".format(label, score) if score else label
else:
caption = captions[i]
ax.text(x1, y1 + 8, caption,
color='w', size=11, backgroundcolor="none")
# Mask
mask = masks[:, :, i]
if show_mask:
masked_image = apply_mask(masked_image, mask, color)
# Mask Polygon
# Pad to ensure proper polygons for masks that touch image edges.
padded_mask = np.zeros(
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
padded_mask[1:-1, 1:-1] = mask
contours = find_contours(padded_mask, 0.5)
for verts in contours:
# Subtract the padding and flip (y, x) to (x, y)
verts = np.fliplr(verts) - 1
p = Polygon(verts, facecolor="none", edgecolor=color)
ax.add_patch(p)
#return masked_image
ax.imshow(masked_image.astype(np.uint8))
return plt
# if auto_show:
# plt.show()
def display_differences(image,
gt_box, gt_class_id, gt_mask,
pred_box, pred_class_id, pred_score, pred_mask,
class_names, title="", ax=None,
show_mask=True, show_box=True,
iou_threshold=0.5, score_threshold=0.5):
"""Display ground truth and prediction instances on the same image."""
# Match predictions to ground truth
gt_match, pred_match, overlaps = utils.compute_matches(
gt_box, gt_class_id, gt_mask,
pred_box, pred_class_id, pred_score, pred_mask,
iou_threshold=iou_threshold, score_threshold=score_threshold)
# Ground truth = green. Predictions = red
colors = [(0, 1, 0, .8)] * len(gt_match)\
+ [(1, 0, 0, 1)] * len(pred_match)
# Concatenate GT and predictions
class_ids = np.concatenate([gt_class_id, pred_class_id])
scores = np.concatenate([np.zeros([len(gt_match)]), pred_score])
boxes = np.concatenate([gt_box, pred_box])
masks = np.concatenate([gt_mask, pred_mask], axis=-1)
# Captions per instance show score/IoU
captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format(
pred_score[i],
(overlaps[i, int(pred_match[i])]
if pred_match[i] > -1 else overlaps[i].max()))
for i in range(len(pred_match))]
# Set title if not provided
title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU"
# Display
display_instances(
image,
boxes, masks, class_ids,
class_names, scores, ax=ax,
show_bbox=show_box, show_mask=show_mask,
colors=colors, captions=captions,
title=title)
def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
"""
anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
proposals: [n, 4] the same anchors but refined to fit objects better.
"""
masked_image = image.copy()
# Pick random anchors in case there are too many.
ids = np.arange(rois.shape[0], dtype=np.int32)
ids = np.random.choice(
ids, limit, replace=False) if ids.shape[0] > limit else ids
fig, ax = plt.subplots(1, figsize=(12, 12))
if rois.shape[0] > limit:
plt.title("Showing {} random ROIs out of {}".format(
len(ids), rois.shape[0]))
else:
plt.title("{} ROIs".format(len(ids)))
# Show area outside image boundaries.
ax.set_ylim(image.shape[0] + 20, -20)
ax.set_xlim(-50, image.shape[1] + 20)
ax.axis('off')
for i, id in enumerate(ids):
color = np.random.rand(3)
class_id = class_ids[id]
# ROI
y1, x1, y2, x2 = rois[id]
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
edgecolor=color if class_id else "gray",
facecolor='none', linestyle="dashed")
ax.add_patch(p)
# Refined ROI
if class_id:
ry1, rx1, ry2, rx2 = refined_rois[id]
p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Connect the top-left corners of the anchor and proposal for easy visualization
ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
# Label
label = class_names[class_id]
ax.text(rx1, ry1 + 8, "{}".format(label),
color='w', size=11, backgroundcolor="none")
# Mask
m = utils.unmold_mask(mask[id], rois[id]
[:4].astype(np.int32), image.shape)
masked_image = apply_mask(masked_image, m, color)
ax.imshow(masked_image)
# Print stats
print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
print("Positive Ratio: {:.2f}".format(
class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
# TODO: Replace with matplotlib equivalent?
def draw_box(image, box, color):
"""Draw 3-pixel width bounding boxes on the given image array.
color: list of 3 int values for RGB.
"""
y1, x1, y2, x2 = box
image[y1:y1 + 2, x1:x2] = color
image[y2:y2 + 2, x1:x2] = color
image[y1:y2, x1:x1 + 2] = color
image[y1:y2, x2:x2 + 2] = color
return image
def display_top_masks(image, mask, class_ids, class_names, limit=4):
"""Display the given image and the top few class masks."""
to_display = []
titles = []
to_display.append(image)
titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
# Pick top prominent classes in this image
unique_class_ids = np.unique(class_ids)
mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
for i in unique_class_ids]
top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
key=lambda r: r[1], reverse=True) if v[1] > 0]
# Generate images and titles
for i in range(limit):
class_id = top_ids[i] if i < len(top_ids) else -1
# Pull masks of instances belonging to the same class.
m = mask[:, :, np.where(class_ids == class_id)[0]]
m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
to_display.append(m)
titles.append(class_names[class_id] if class_id != -1 else "-")
display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
def plot_precision_recall(AP, precisions, recalls):
"""Draw the precision-recall curve.
AP: Average precision at IoU >= 0.5
precisions: list of precision values
recalls: list of recall values
"""
# Plot the Precision-Recall curve
_, ax = plt.subplots(1)
ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
ax.set_ylim(0, 1.1)
ax.set_xlim(0, 1.1)
_ = ax.plot(recalls, precisions)
def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
overlaps, class_names, threshold=0.5):
"""Draw a grid showing how ground truth objects are classified.
gt_class_ids: [N] int. Ground truth class IDs
pred_class_id: [N] int. Predicted class IDs
pred_scores: [N] float. The probability scores of predicted classes
overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes.
class_names: list of all class names in the dataset
threshold: Float. The prediction probability required to predict a class
"""
gt_class_ids = gt_class_ids[gt_class_ids != 0]
pred_class_ids = pred_class_ids[pred_class_ids != 0]
plt.figure(figsize=(12, 10))
plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
plt.yticks(np.arange(len(pred_class_ids)),
["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
for i, id in enumerate(pred_class_ids)])
plt.xticks(np.arange(len(gt_class_ids)),
[class_names[int(id)] for id in gt_class_ids], rotation=90)
thresh = overlaps.max() / 2.
for i, j in itertools.product(range(overlaps.shape[0]),
range(overlaps.shape[1])):
text = ""
if overlaps[i, j] > threshold:
text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
color = ("white" if overlaps[i, j] > thresh
else "black" if overlaps[i, j] > 0
else "grey")
plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
horizontalalignment="center", verticalalignment="center",
fontsize=9, color=color)
plt.tight_layout()
plt.xlabel("Ground Truth")
plt.ylabel("Predictions")
def draw_boxes(image, boxes=None, refined_boxes=None,
masks=None, captions=None, visibilities=None,
title="", ax=None):
"""Draw bounding boxes and segmentation masks with different
customizations.
boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
refined_boxes: Like boxes, but draw with solid lines to show
that they're the result of refining 'boxes'.
masks: [N, height, width]
captions: List of N titles to display on each box
visibilities: (optional) List of values of 0, 1, or 2. Determine how
prominent each bounding box should be.
title: An optional title to show over the image
ax: (optional) Matplotlib axis to draw on.
"""
# Number of boxes
assert boxes is not None or refined_boxes is not None
N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
# Matplotlib Axis
if not ax:
_, ax = plt.subplots(1, figsize=(12, 12))
# Generate random colors
colors = random_colors(N)
# Show area outside image boundaries.
margin = image.shape[0] // 10
ax.set_ylim(image.shape[0] + margin, -margin)
ax.set_xlim(-margin, image.shape[1] + margin)
ax.axis('off')
ax.set_title(title)
masked_image = image.astype(np.uint32).copy()
for i in range(N):
# Box visibility
visibility = visibilities[i] if visibilities is not None else 1
if visibility == 0:
color = "gray"
style = "dotted"
alpha = 0.5
elif visibility == 1:
color = colors[i]
style = "dotted"
alpha = 1
elif visibility == 2:
color = colors[i]
style = "solid"
alpha = 1
# Boxes
if boxes is not None:
if not np.any(boxes[i]):
# Skip this instance. Has no bbox. Likely lost in cropping.
continue
y1, x1, y2, x2 = boxes[i]
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
alpha=alpha, linestyle=style,
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Refined boxes
if refined_boxes is not None and visibility > 0:
ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Connect the top-left corners of the anchor and proposal
if boxes is not None:
ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
# Captions
if captions is not None:
caption = captions[i]
# If there are refined boxes, display captions on them
if refined_boxes is not None:
y1, x1, y2, x2 = ry1, rx1, ry2, rx2
ax.text(x1, y1, caption, size=11, verticalalignment='top',
color='w', backgroundcolor="none",
bbox={'facecolor': color, 'alpha': 0.5,
'pad': 2, 'edgecolor': 'none'})
# Masks
if masks is not None:
mask = masks[:, :, i]
masked_image = apply_mask(masked_image, mask, color)
# Mask Polygon
# Pad to ensure proper polygons for masks that touch image edges.
padded_mask = np.zeros(
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
padded_mask[1:-1, 1:-1] = mask
contours = find_contours(padded_mask, 0.5)
for verts in contours:
# Subtract the padding and flip (y, x) to (x, y)
verts = np.fliplr(verts) - 1
p = Polygon(verts, facecolor="none", edgecolor=color)
ax.add_patch(p)
ax.imshow(masked_image.astype(np.uint8))
def display_table(table):
"""Display values in a table format.
table: an iterable of rows, and each row is an iterable of values.
"""
html = ""
for row in table:
row_html = ""
for col in row:
row_html += "{:40} ".format(str(col))
html += "" + row_html + " "
html = "" + html + "
"
IPython.display.display(IPython.display.HTML(html))
def display_weight_stats(model):
"""Scans all the weights in the model and returns a list of tuples
that contain stats about each weight.
"""
layers = model.get_trainable_layers()
table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
for l in layers:
weight_values = l.get_weights() # list of Numpy arrays
weight_tensors = l.weights # list of TF tensors
for i, w in enumerate(weight_values):
weight_name = weight_tensors[i].name
# Detect problematic layers. Exclude biases of conv layers.
alert = ""
if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
alert += "*** dead?"
if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
alert += "*** Overflow?"
# Add row
table.append([
weight_name + alert,
str(w.shape),
"{:+9.4f}".format(w.min()),
"{:+10.4f}".format(w.max()),
"{:+9.4f}".format(w.std()),
])
display_table(table)
forcast.py:
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
import time
from mrcnn.config import Config
from datetime import datetime
import get_result as get_result
import ImagePath as ImagePath
ROOT_DIR = os.getcwd()
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
class ShapesConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "shapes"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 1
NUM_CLASSES = 1 + 36 # background + 10 numbers
# IMAGE_MIN_DIM = 160
# IMAGE_MAX_DIM = 256
# Use smaller anchors because our image and objects are small , 目标的尺寸,自己去测量几个放到这里
RPN_ANCHOR_SCALES = (40, 50, 60) # anchor side in pixels
TRAIN_ROIS_PER_IMAGE = 100
STEPS_PER_EPOCH = 100
VALIDATION_STEPS = 50
# import train_tongue
# class InferenceConfig(coco.CocoConfig):
class InferenceConfig(ShapesConfig):
# Set batch size to 1 since we'll be running inference on
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
config.display()
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Load weights trained on MS-COCO
model.load_weights('logs/shapes20190521T0957/mask_rcnn_shapes_0100.h5', by_name=True)
# COCO Class names
# Index of the class in the list is its ID. For example, to get ID of
# the teddy bear class, use: class_names.index('teddy bear')
class_names = ['BG', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
# Load a random image from the images folder
data_path = 'test_jpg'
flle_list = ImagePath.read_image(data_path, 'jpg')
for file in flle_list:
image = skimage.io.imread(file)
a = datetime.now()
# Run detection
results = model.detect([image], verbose=1)
b = datetime.now()
# Visualize results
print("time cost", (b - a).seconds)
r = results[0]
masked_image = get_result.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
file = file.replace("test_jpg","test_result")
masked_image.savefig(file)
print('end predict!')
准备好训练好的模型文件、数据、保存结果的文件夹
(1)、logs/shapes20190521T0957/mask_rcnn_shapes_0100.h5
(2)、test_jpg
(3)、test_result
运行forcast.py即可结果预测,预测的结果如下图所示:
最后,我把代码上传至我的github上面,有兴趣的可以自己下载下来玩:
https://github.com/xiaoyangmoa/code_of_auth.git