这篇文章主要介绍记录使用Maskrcnn-Benchmark(连接官网)的训练自己的数据的心得,还算比较顺利。有问题,希望大佬指出,共同进步
$ conda create --name maskrcnn_benchmark
$ source activate maskrcnn_benchmark
# this installs the right pip and dependencies for the fresh python
$ conda install ipython
# maskrnn_benchmark and coco api dependencies
$ pip install ninja yacs cython matplotlib
# follow PyTorch installation in https://pytorch.org/get-started/locally/
# we give the instructions for CUDA 9.0
$ conda install pytorch-nightly -c pytorch
# install torchvision
$ cd ~/github
$ git clone https://github.com/pytorch/vision.git
$ cd vision
$ python setup.py install
# install pycocotools
$ cd ~/github
$ git clone https://github.com/cocodataset/cocoapi.git
$ cd cocoapi/PythonAPI
$ python setup.py build_ext install
# install PyTorch Detection
$ cd ~/github
$ git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
$ cd maskrcnn-benchmark
$ python setup.py build develop
cd demo
# by default, it runs on the GPU
# for best results, use min-image-size 800
python webcam.py --min-image-size 800
# can also run it on the CPU
python webcam.py --min-image-size 300 MODEL.DEVICE cpu
# or change the model that you want to use
python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
# in order to see the probability heatmaps, pass --show-mask-heatmaps
python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
import xml.etree.ElementTree as ET
import os
import json
coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []
category_set = dict()
image_set = set()
category_item_id = 0
image_id = 20190000000
annotation_id = 0
def addCatItem(name):
global category_item_id
category_item = dict()
category_item['supercategory'] = 'none'
category_item_id += 1
category_item['id'] = category_item_id
category_item['name'] = name
coco['categories'].append(category_item)
category_set[name] = category_item_id
return category_item_id
def addImgItem(file_name, size):
global image_id
if file_name is None:
raise Exception('Could not find filename tag in xml file.')
if size['width'] is None:
raise Exception('Could not find width tag in xml file.')
if size['height'] is None:
raise Exception('Could not find height tag in xml file.')
image_id += 1
image_item = dict()
image_item['id'] = image_id
image_item['file_name'] = file_name
image_item['width'] = size['width']
image_item['height'] = size['height']
coco['images'].append(image_item)
image_set.add(file_name)
return image_id
def addAnnoItem(object_name, image_id, category_id, bbox):
global annotation_id
annotation_item = dict()
annotation_item['segmentation'] = []
seg = []
# bbox[] is x,y,w,h
# left_top
seg.append(bbox[0])
seg.append(bbox[1])
# left_bottom
seg.append(bbox[0])
seg.append(bbox[1] + bbox[3])
# right_bottom
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1] + bbox[3])
# right_top
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1])
annotation_item['segmentation'].append(seg)
annotation_item['area'] = bbox[2] * bbox[3]
annotation_item['iscrowd'] = 0
annotation_item['ignore'] = 0
annotation_item['image_id'] = image_id
annotation_item['bbox'] = bbox
annotation_item['category_id'] = category_id
annotation_id += 1
annotation_item['id'] = annotation_id
coco['annotations'].append(annotation_item)
def parseXmlFiles(xml_path):
for f in os.listdir(xml_path):
if not f.endswith('.xml'):
continue
bndbox = dict()
size = dict()
current_image_id = None
current_category_id = None
file_name = None
size['width'] = None
size['height'] = None
size['depth'] = None
xml_file = os.path.join(xml_path, f)
print(xml_file)
tree = ET.parse(xml_file)
root = tree.getroot() #抓根结点元素
if root.tag != 'annotation': #根节点标签
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
# elem is , , ,
通过上面的代码可以生成自己的instances_train2104.json和instances_val2014.json后,建立datasets文件,在建立coco文件(annotations(存放生成的json文件)与 train2014和val2014文件(存放训练测试数据集)
建立experiment文件夹 存放cfg 与result
模型配置文件:提取configs/e2e_faster_rcnn_R_50_FPN_1x.yaml
e2e_faster_rcnn_R_50_FPN_1x.yaml文件:
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True #是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
ANCHOR_STRIDE: (4, 8, 16, 32, 64)# anchor的步长
PRE_NMS_TOP_N_TRAIN: 2000 #训练中nms之前的候选区的数量
PRE_NMS_TOP_N_TEST: 1000 #测试时,nms之后的候选框数量
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", )#训练的数据文件
TEST: ("coco_2014_val",)#测试的数据文件,和paths_catalog.py中DATASETS相对应
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02 #起始学习率,学习率的调整有多种策略,訪框架自定义了一种策略
WEIGHT_DECAY: 0.0001
STEPS: (5000, 8000)
MAX_ITER: 10000
OUTPUT_DIR : /home/yb/maskrcnn-benchmark/experiments/result #设置的输出目录
如果训练e2e_mask_rcnn_R_50_FPN_1x.yaml文件:
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "maskrcnn-benchmark-master/weights/R-50.pkl" # 预训练模型路径
BACKBONE:
CONV_BODY: "R-50-FPN" # 网络结构
OUT_CHANNELS: 256
RPN:
USE_FPN: True # 是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
ANCHOR_STRIDE: (4, 8, 16, 32, 64) # ANCHOR的步长
PRE_NMS_TOP_N_TRAIN: 2000 # 训练时,NMS之前的候选区数量
PRE_NMS_TOP_N_TEST: 1000 # 测试时,NMS之后的候选区数量
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True # 是否使用语义分割功能
DATASETS: # 使用的数据,对应前面的paths_catalog.py
TRAIN: ("coco_2014_train", "coco_2014_val")
TEST: ("coco_2014_test",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
IMS_PER_BATCH: 2 # GPU每次训练的图片数
BASE_LR: 0.0025 # 初始学习速率
WEIGHT_DECAY: 0.0001 # 学习速率衰减大小
STEPS: (30000, 40000) # 学习速率衰减策略(这个博主不太明白,就使用的默认的)
MAX_ITER: 60000 # 最大迭代次数
INPUT:
MIN_SIZE_TRAIN: 300 # 最大图片尺寸
MAX_SIZE_TRAIN: 300 # 最小图片尺寸
TEST:
IMS_PER_BATCH: 1
OUTPUT_DIR: "output" # 输出文件夹
MaskRCNN-Benchmark框架配置文件:maskrcnn_benchmark/config/defaults.py
import os
from yacs.config import CfgNode as CN
_C = CN()
_C.MODEL = CN()
_C.MODEL.RPN_ONLY = False
_C.MODEL.MASK_ON = False
_C.MODEL.DEVICE = "cuda"
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
_C.MODEL.WEIGHT = ""
_C.INPUT = CN()
_C.INPUT.MIN_SIZE_TRAIN = 800 #训练集图片最小尺寸
_C.INPUT.MAX_SIZE_TRAIN = 1333 #训练集图片最大尺寸
_C.INPUT.MIN_SIZE_TEST = 800
_C.INPUT.MAX_SIZE_TEST = 1333
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717]
_C.INPUT.PIXEL_STD = [1., 1., 1.]
_C.INPUT.TO_BGR255 = True
_C.DATASETS = CN()
_C.DATASETS.TRAIN = () #在模型配置文件中已给出
_C.DATASETS.TEST = ()
_C.DATALOADER = CN()
_C.DATALOADER.NUM_WORKERS = 4 #数据生成启线程数
_C.DATALOADER.SIZE_DIVISIBILITY = 0
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4"
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2
_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4
_C.MODEL.RPN = CN()
_C.MODEL.RPN.USE_FPN = False
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)
_C.MODEL.RPN.ANCHOR_STRIDE = (16,)
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0)
_C.MODEL.RPN.STRADDLE_THRESH = 0
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000
_C.MODEL.RPN.NMS_THRESH = 0.7
_C.MODEL.RPN.MIN_SIZE = 0
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.USE_FPN = False
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05
_C.MODEL.ROI_HEADS.NMS = 0.5
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100
_C.MODEL.ROI_BOX_HEAD = CN()
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor"
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,)
#数据集类别数,默认是81,因为coco数据集为80+1(背景),我的数据集只有1个类别,加上背景共2个类别
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 2
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256)
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.NUM_GROUPS = 1
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm"
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm"
_C.MODEL.RESNETS.RES5_DILATION = 1
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
_C.SOLVER = CN()
_C.SOLVER.MAX_ITER = 40000 #最大迭代次数
_C.SOLVER.BASE_LR = 0.02 #初始学习率,这个通常在模型配置文件中有设置
_C.SOLVER.BIAS_LR_FACTOR = 2
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500 #预热迭代次数,预热迭代次数内(小于訪值)的学习率比较低
_C.SOLVER.WARMUP_METHOD = "constant" #预热策略,有'constant'和'linear'两种
_C.SOLVER.CHECKPOINT_PERIOD = 2000 #生成检查点(checkpoint)的步长
_C.SOLVER.IMS_PER_BATCH = 1 #一个batch包含的图片数量
_C.TEST = CN()
_C.TEST.EXPECTED_RESULTS = []
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4
_C.TEST.IMS_PER_BATCH = 1
_C.OUTPUT_DIR = "output" #主要作为checkpoint和inference的输出目录
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py")
class DatasetCatalog(object):
DATA_DIR = "datasets"
DATASETS = {
"coco_2014_train": (
"coco/train2014",
"coco/annotations/instances_train2014.json", # 标注文件路径
),
"coco_2014_val": (
"coco/val2014", #同上
"coco/annotations/instances_val2014.json" #同上
),
}
cd maskrcnn-benchmark
python tools/train_net.py --config-file experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1x.yaml
指定模型配置文件,执行测试启动脚本
python tools/test_net.py --config-file experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1x.yaml
#!--*-- coding:utf-8 --*--
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import requests
from io import BytesIO
from PIL import Image
import numpy as np
pylab.rcParams['figure.figsize'] = 20, 12
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
# 参数配置文件
config_file = "/home/maskrcnn-benchmark/experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1X.yaml"#自己配置的cfg文件
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
cfg.MODEL.WEIGHT = '../pretrained/e2e_mask_rcnn_R_50_FPN_1x.pth'
coco_demo = COCODemo(cfg, min_image_size=800, confidence_threshold=0.7, )
if False:
# imgurl = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg"
# response = requests.get(imgurl)
# pil_image = Image.open(BytesIO(response.content)).convert("RGB")
else:
imgfile = './test/1.jpg'
pil_image = Image.open(imgfile).convert("RGB")
image = np.array(pil_image)[:, :, [2, 1, 0]]
# forward predict
predictions = coco_demo.run_on_opencv_image(image)
# vis
plt.subplot(1, 2, 1)
plt.imshow(image[:, :, ::-1])
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(predictions[:, :, ::-1])
plt.axis('off')
plt.show()
参考链接:https://blog.csdn.net/ChuiGeDaQiQiu/article/details/83868512
https://blog.csdn.net/x572722344/article/details/84934188