Maskrcnn 训练COCO格式数据集
使用coco-annotator标注工具
coco.py改为
"""
Mask R-CNN
Configurations and data loading code for MS COCO.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
------------------------------------------------------------
Usage: import the module (see Jupyter notebooks for examples), or run from
the command line as such:
# Train a new model starting from pre-trained COCO weights
python3 coco.py train --dataset=/path/to/coco/ --model=coco
# Train a new model starting from ImageNet weights. Also auto download COCO dataset
python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True
# Continue training a model that you had trained earlier
python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
# Continue training the last model you trained
python3 coco.py train --dataset=/path/to/coco/ --model=last
# Run COCO evaluatoin on the last model you trained
python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
"""
import os
import sys
import time
import numpy as np
import imgaug # https://github.com/aleju/imgaug (pip3 install imgaug)
# 下载和安装Python COCO工具 https://github.com/waleedka/coco
# 这个fork的原始地址 https://github.com/pdollar/coco 修复在Python 3下的bug.
# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
# If the PR is merged then use the original repo.
# Note: Edit PythonAPI/Makefile and replace "python" with "python3".
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils
import zipfile
import urllib.request
import shutil
# 项目根目录
ROOT_DIR = os.path.abspath("../../")
# 导入 Mask RCNN
sys.path.append(ROOT_DIR) #找到本地版本库
from mrcnn.config import Config
from mrcnn import model as modellib, utils
#训练权重文件路径
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# 存储日志和模型检查点的路径 可以通过命令行 --logs提供
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
DEFAULT_DATASET_YEAR = "2014"
############################################################
# 配置
############################################################
class CocoConfig(Config):
"""Configuration for training on MS COCO.
Derives from the base Config class and overrides values specific
to the COCO dataset.
"""
# 给配置一个名字
NAME = "coco"
# 我们使用12G显存的GPU, 可以放两张图片.
# 如果显存更小就调节图片数量.
IMAGES_PER_GPU = 1
# 取消对在8 gpus上训练的注释(默认值为1)
# GPU_COUNT = 8
# 分类数 (包含背景)
NUM_CLASSES = 1 + 2 # COCO 有80个分类
IMAGE_MIN_DIM = 512
IMAGE_MAX_DIM = 1152
STEPS_PER_EPOCH = 100
############################################################
# 数据集
############################################################
class CocoDataset(utils.Dataset):
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
class_map=None, return_coco=False, auto_download=False):
"""读取COCO数据集的子集.
dataset_dir: COCO数据集根目录.
subset: 读取哪一个 (train, val, minival, valminusminival)
year: 作为字符串读取哪一年的数据集 (2014, 2017) 而不是作为整型
class_ids: 如果提供了, 只读取给定分类id的图片.
class_map: TODO:尚未实现。支持将不同数据集的类映射到同一个类ID。
return_coco: 如果为真, 返回COCO对象.
auto_download: 自动下载和解压 MS-COCO 图片和标注
"""
if auto_download is True:
self.auto_download(dataset_dir, subset, year)
coco = COCO("{}\\annotations\\instances_{}{}.json".format(dataset_dir, subset, year))
if subset == "minival" or subset == "valminusminival":
subset = "val"
image_dir = "{}\\{}{}".format(dataset_dir, subset, year)
# 读取所有分类或者一个子集?
if not class_ids:
# 所有分类
class_ids = sorted(coco.getCatIds())
# 所有图片或者一个子集?
if class_ids:
image_ids = []
for id in class_ids:
image_ids.extend(list(coco.getImgIds(catIds=[id])))
# 删除重复项
image_ids = list(set(image_ids))
else:
# 所有图片
image_ids = list(coco.imgs.keys())
# 增加分类
for i in class_ids:
self.add_class("coco", i, coco.loadCats(i)[0]["name"])
# 增加图片
for i in image_ids:
self.add_image(
"coco", image_id=i,
path=os.path.join(image_dir, coco.imgs[i]['file_name']),
width=coco.imgs[i]["width"],
height=coco.imgs[i]["height"],
annotations=coco.loadAnns(coco.getAnnIds(
imgIds=[i], catIds=class_ids, iscrowd=None)))
if return_coco:
return coco
def auto_download(self, dataDir, dataType, dataYear):
"""如果请求下载COCO数据集图片和标注.
dataDir: COCO数据集根目录.
dataType: 想要读取哪一个 (train, val, minival, valminusminival)
dataYear: 想要读取什么年份的 (2014, 2017) 使用字符串而不是整型
备注:
对于 2014, 使用 "train", "val", "minival", 或者 "valminusminival"
对于 2017, 只有 "train" 和 "val" 的标注可用
"""
# 安装路径和文件名
if dataType == "minival" or dataType == "valminusminival":
imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
else:
imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
# print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
# 如果主文件夹尚不存在,则创建它
if not os.path.exists(dataDir):
os.makedirs(dataDir)
# 如果本地不可用,那么下载图像
if not os.path.exists(imgDir):
os.makedirs(imgDir)
print("Downloading images to " + imgZipFile + " ...")
with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
shutil.copyfileobj(resp, out)
print("... done downloading.")
print("Unzipping " + imgZipFile)
with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
zip_ref.extractall(dataDir)
print("... done unzipping")
print("Will use images in " + imgDir)
# 安装标注文件路径
annDir = "{}/annotations".format(dataDir)
if dataType == "minival":
annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
annFile = "{}/instances_minival2014.json".format(annDir)
annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
unZipDir = annDir
elif dataType == "valminusminival":
annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
annFile = "{}/instances_valminusminival2014.json".format(annDir)
annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
unZipDir = annDir
else:
annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
unZipDir = dataDir
# print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
# 下载标注文件 如果本地不可用
if not os.path.exists(annDir):
os.makedirs(annDir)
if not os.path.exists(annFile):
if not os.path.exists(annZipFile):
print("Downloading zipped annotations to " + annZipFile + " ...")
with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
shutil.copyfileobj(resp, out)
print("... done downloading.")
print("Unzipping " + annZipFile)
with zipfile.ZipFile(annZipFile, "r") as zip_ref:
zip_ref.extractall(unZipDir)
print("... done unzipping")
print("Will use annotations in " + annFile)
def load_mask(self, image_id):
"""
读取给定图像实例掩膜
不同数据集使用不同形式的掩膜
这个函数转换不同掩膜格式到比特图的格式
返回:
掩膜:一个布尔数组 形状为宽,高,实例数 /每个掩膜每个图像
class_ids: 实例掩码的类ID的一维数组。
"""
# 如果不是COCO图像,则委托给父类。
image_info = self.image_info[image_id]
if image_info["source"] != "coco":
return super(CocoDataset, self).load_mask(image_id)
instance_masks = []
class_ids = []
annotations = self.image_info[image_id]["annotations"]
# 建立这个尺寸的掩膜 [height, width, instance_count] 和对应于掩码每个通道的类ID列表。
for annotation in annotations:
class_id = self.map_source_class_id(
"coco.{}".format(annotation['category_id']))
if class_id:
m = self.annToMask(annotation, image_info["height"],
image_info["width"])
#有些物体太小了,以至于它们的像素面积小于1,最后被取整。跳过那些对象。
if m.max() < 1:
continue
# 是挤在一起的吗?如果是,请使用负类ID。
if annotation['iscrowd']:
# 对群使用负类ID
class_id *= -1
# 对于群组掩码,anntomask()有时返回小于给定维度的掩码。如果是,请调整大小。
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
instance_masks.append(m)
class_ids.append(class_id)
# 将实例掩码打包到数组中
if class_ids:
mask = np.stack(instance_masks, axis=2).astype(np.bool)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
else:
# 调用超级类返回空掩码
return super(CocoDataset, self).load_mask(image_id)
def image_reference(self, image_id):
"""返回Coco网站上的图片链接."""
info = self.image_info[image_id]
if info["source"] == "coco":
return "http://cocodataset.org/#explore?id={}".format(info["id"])
else:
super(CocoDataset, self).image_reference(image_id)
# 以下两个函数来自pycocotools,只做了一些更改。
def annToRLE(self, ann, height, width):
"""
转换注释,可以是多边形,未压缩的RLE到RLE。
:return: 二进制掩膜 (numpy 2D array)
"""
segm = ann['segmentation']
if isinstance(segm, list):
# 多边形——一个物体可能由多个部分组成。
# 我们将所有部分合并成一个mask rle代码
rles = maskUtils.frPyObjects(segm, height, width)
rle = maskUtils.merge(rles)
elif isinstance(segm['counts'], list):
# 未压缩的RLE
rle = maskUtils.frPyObjects(segm, height, width)
else:
# rle
rle = ann['segmentation']
return rle
def annToMask(self, ann, height, width):
"""
将可以是多边形、未压缩的rle或rle的注释转换为二进制掩码。
:return: 二进制掩膜 (numpy 2D array)
"""
rle = self.annToRLE(ann, height, width)
m = maskUtils.decode(rle)
return m
############################################################
# COCO 评估
############################################################
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
"""以 http://cocodataset.org/#format 排列结果以匹配coco规范
"""
# 如果没有结果,则返回空列表
if rois is None:
return []
results = []
for image_id in image_ids:
# 循环检测
for i in range(rois.shape[0]):
class_id = class_ids[i]
score = scores[i]
bbox = np.around(rois[i], 1)
mask = masks[:, :, i]
result = {
"image_id": image_id,
"category_id": dataset.get_source_class_id(class_id, "coco"),
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
"score": score,
"segmentation": maskUtils.encode(np.asfortranarray(mask))
}
results.append(result)
return results
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
"""运行官方COCO评估.
dataset: 带有有效数据的数据集对象
eval_type: “bbox”或“segm”用于边界框或分段评估
limit: 如果不是0,则是用于评估的图像数
"""
# 从数据集中选取COCO图像
image_ids = image_ids or dataset.image_ids
# 限制为子集
if limit:
image_ids = image_ids[:limit]
# 获取相应的COCO图像ID。
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
t_prediction = 0
t_start = time.time()
results = []
for i, image_id in enumerate(image_ids):
# 读取图像
image = dataset.load_image(image_id)
# 运行推断
t = time.time()
r = model.detect([image], verbose=0)[0]
t_prediction += (time.time() - t)
# 将结果转换为COCO格式
# 将掩码强制转换为uint8,因为coco工具在bool上出错
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
r["rois"], r["class_ids"],
r["scores"],
r["masks"].astype(np.uint8))
results.extend(image_results)
# 加载结果。这将使用其他属性修改结果。
coco_results = coco.loadRes(results)
# 评估
cocoEval = COCOeval(coco, coco_results, eval_type)
cocoEval.params.imgIds = coco_image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
print("Prediction time: {}. Average {}/image".format(
t_prediction, t_prediction / len(image_ids)))
print("Total time: ", time.time() - t_start)
############################################################
# 训练
############################################################
if __name__ == '__main__':
import argparse
# 解析命令行参数
parser = argparse.ArgumentParser(
description='Train Mask R-CNN on MS COCO.')
parser.add_argument("command",
metavar="",
help="'train' or 'evaluate' on MS COCO")
parser.add_argument('--dataset', required=True,
metavar="\\path\\to\\coco\\",
help='Directory of the MS-COCO dataset')
parser.add_argument('--year', required=False,
default=DEFAULT_DATASET_YEAR,
metavar="",
help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
parser.add_argument('--model', required=True,
metavar="\\path\\to\\weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="\\path\\to\\logs\\",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--limit', required=False,
default=500,
metavar="",
help='Images to use for evaluation (default=500)')
parser.add_argument('--download', required=False,
default=False,
metavar="",
help='Automatically download and unzip MS-COCO files (default=False)',
type=bool)
args = parser.parse_args()
print("Command: ", args.command)
print("Model: ", args.model)
print("Dataset: ", args.dataset)
print("Year: ", args.year)
print("Logs: ", args.logs)
print("Auto Download: ", args.download)
# 配置
if args.command == "train":
config = CocoConfig()
else:
class InferenceConfig(CocoConfig):
# 将批处理大小设置为1,因为我们将一次对一个图像运行推理。Batch size = GPU_COUNT * IMAGES_PER_GPU
GPU_COUNT = 1
IMAGES_PER_GPU = 1
DETECTION_MIN_CONFIDENCE = 0
config = InferenceConfig()
config.display()
# 创建模型
if args.command == "train":
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=args.logs)
else:
model = modellib.MaskRCNN(mode="inference", config=config,
model_dir=args.logs)
# 选择要加载的权重文件
if args.model.lower() == "coco":
model_path = COCO_MODEL_PATH
elif args.model.lower() == "last":
# 查找上次训练的权重
model_path = model.find_last()
elif args.model.lower() == "imagenet":
# 从ImageNet训练的权重开始
model_path = model.get_imagenet_weights()
else:
model_path = args.model
# 读取权重
print("Loading weights ", model_path)
model.load_weights(model_path, by_name=True)
# 训练或者评估
if args.command == "train":
# 培训数据集。使用训练集和来自验证集的35K,如Mask RCNN论文中所述。
dataset_train = CocoDataset()
dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
# if args.year in '2014':
# dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
dataset_train.prepare()
# 验证数据集
dataset_val = CocoDataset()
# val_type = "val" if args.year in '2017' else "minival"
dataset_val.load_coco(args.dataset, "val", year=args.year, auto_download=args.download)
dataset_val.prepare()
# 图像增强
# 右/左翻转50%
augmentation = imgaug.augmenters.Fliplr(0.5)
# *** 这个训练计划就是一个例子。更新你所需要的 ***
# Training - Stage 1
'''
print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=40,
layers='heads',
augmentation=augmentation)
# Training - Stage 2
# Finetune layers from ResNet stage 4 and up
print("Fine tune Resnet stage 4 and up")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=120,
layers='4+',
augmentation=augmentation)
# Training - Stage 3
# Fine tune all layers
'''
print("Fine tune all layers")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=160,
layers='all',
augmentation=augmentation)
elif args.command == "evaluate":
# Validation dataset
dataset_val = CocoDataset()
#val_type = "val" if args.year in '2017' else "minival"
coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
dataset_val.prepare()
print("Running COCO evaluation on {} images.".format(args.limit))
evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
else:
print("'{}' is not recognized. "
"Use 'train' or 'evaluate'".format(args.command))
自己核对一下 不会排版
model.py里面 第2088行
改为
def load_weights(self, filepath, by_name=False, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc","mrcnn_bbox", "mrcnn_mask"]):
显存问题
我的解决办法是增加虚拟内存 加了40G