Mask rcnn 训练自己的数据集

win 10  anaconda  pycharm

项目地址:https://github.com/matterport/Mask_RCNN

1.环境配置

参考    https://blog.csdn.net/hesongzefairy/article/details/104702119

2.数据集准备

 创建文件夹 dataset

Mask rcnn 训练自己的数据集_第1张图片

其中pic文件夹放原始图片

Mask rcnn 训练自己的数据集_第2张图片

然后使用labelme 对图片进行标注 标注后的json文件保存在 json文件夹里

Mask rcnn 训练自己的数据集_第3张图片

 然后利用labelme自带的 labelme_json_to_dataset 生成json 文件夹 每一个json文件夹包含5个子文件,接送文件夹存放在labelme_json文件夹中

Mask rcnn 训练自己的数据集_第4张图片

Mask rcnn 训练自己的数据集_第5张图片

批量生成 json文件夹的语句

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os

if __name__ == '__main__':
    json_dir = "D:\Desktop\summer\Mask_RCNN-master\dataset\json" #存放labelme标注后的json文件
    for name in os.listdir(json_dir):
        #print(name)
        json_path = os.path.join(json_dir,name)
        os.system(str("labelme_json_to_dataset " + json_path))
        print("success json to dataset: ",json_path)

然后将labelme_json中 label.png移动到cv2_mask文件夹中(网上很多教程中有对label.png处理的过程(16转8位)这主要是之前labelme版本问题 我的是4.5.7是OK的 labelme_json_to_dataset生成的label.png 本身就是8位 可以直接用于训练)

将label.png统一复制到 cv2_mask文件夹 使用了如下程序

from PIL import Image
import numpy as np
import os
n=5 #n为.json文件个数
for i in range(n):
    # open_path='E:/data_image/new_file/L_train/labelme_json/'+'L'+format(str(i), '0>1s')+'_json'+'/label.png'#文件地址
    open_path = "D:\Desktop\summer\Mask_RCNN-master\dataset\labelme_json/"+format(str(i+1), '0>1s')+'_json'+'/label.png'
    # try:
    #     f=open(open_path)
    #     f.close()
    # except FileNotFoundError:
    #     continue
    img1=Image.open(open_path)#打开图像
    print(img1)
    save_path='D:\Desktop\summer\Mask_RCNN-master\dataset\cv2.mask/'#保存地址
    # img1.show()
    # img=Image.fromarray(np.uint8(img1))#16位转换成8位
    img=img1
    img.save(os.path.join(save_path,str(i+1)+'.png')) #保存成png格式

至此数据集的准备工作就完成了

3.训练

训练使用的程序 是在samples/shapes/train_shpes.py文件上修改得到的,我直接参考了网上的过程

# -*- coding: utf-8 -*-

import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from mrcnn.config import Config
#import utils
from mrcnn import model as modellib,utils
from mrcnn import visualize
import yaml
from mrcnn.model import log
from PIL import Image

import tensorflow as tf
from keras import backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
K.set_session(sess)

#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# Root directory of the project
ROOT_DIR = os.getcwd()

#ROOT_DIR = os.path.abspath("../")
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

iter_num=0

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 100

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 50


config = ShapesConfig()
config.display()

class DrugDataset(utils.Dataset):
    # 得到该图中有多少个实例(物体)
    def get_obj_index(self, image):
        n = np.max(image)
        return n

    # 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
    def from_yaml_get_class(self, image_id):
        info = self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp = yaml.load(f.read(),Loader=yaml.FullLoader)
            labels = temp['label_names']
            del labels[0]
        return labels

    # 重新写draw_mask
    def draw_mask(self, num_obj, mask, image,image_id):
        #print("draw_mask-->",image_id)
        #print("self.image_info",self.image_info)
        info = self.image_info[image_id]
        #print("info-->",info)
        #print("info[width]----->",info['width'],"-info[height]--->",info['height'])
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    #print("image_id-->",image_id,"-i--->",i,"-j--->",j)
                    #print("info[width]----->",info['width'],"-info[height]--->",info['height'])
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask

    # 重新写load_shapes,里面包含自己的自己的类别
    # 并在self.image_info信息中添加了path、mask_path 、yaml_path
    # yaml_pathdataset_root_path = "/tongue_dateset/"
    # img_floder = dataset_root_path + "rgb"
    # mask_floder = dataset_root_path + "mask"
    # dataset_root_path = "/tongue_dateset/"
    def load_shapes(self, count, img_floder, mask_floder, imglist, dataset_root_path):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        self.add_class("shapes", 1, "car1")
        # self.add_class("shapes", 2, "b")
        # self.add_class("shapes", 3, "c")
        # self.add_class("shapes", 4, "e")
        for i in range(count):
            # 获取图片宽和高

            filestr = imglist[i].split(".")[0]
            #print(imglist[i],"-->",cv_img.shape[1],"--->",cv_img.shape[0])
            #print("id-->", i, " imglist[", i, "]-->", imglist[i],"filestr-->",filestr)
            # filestr = filestr.split("_")[1]
            mask_path = mask_floder + "/" + filestr + ".png"
            yaml_path = dataset_root_path + "labelme_json/" + filestr + "_json/info.yaml"
            print(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")
            cv_img = cv2.imread(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")

            self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
                           width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)

    # 重写load_mask
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        global iter_num
        print("image_id",image_id)
        info = self.image_info[image_id]
        count = 1  # number of object
        img = Image.open(info['mask_path'])
        num_obj = self.get_obj_index(img)
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
        mask = self.draw_mask(num_obj, mask, img,image_id)
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion

            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        labels = []
        labels = self.from_yaml_get_class(image_id)
        labels_form = []
        for i in range(len(labels)):
            if labels[i].find("car1") != -1:
                labels_form.append("car1")
            # elif labels[i].find("b") != -1:
            #     labels_form.append("b")
            # elif labels[i].find("c") != -1:
            #     labels_form.append("c")
            # elif labels[i].find("e") != -1:
            #     labels_form.append("e")
        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)

def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.

    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
    return ax

#基础设置
dataset_root_path="dataset/"
img_floder = dataset_root_path + "pic"
mask_floder = dataset_root_path + "cv2_mask"
#yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)

#train与val数据集准备
dataset_train = DrugDataset()
dataset_train.load_shapes(count, img_floder, mask_floder, imglist,dataset_root_path)
dataset_train.prepare()

#print("dataset_train-->",dataset_train._image_ids)

dataset_val = DrugDataset()
# dataset_val.load_shapes(7, img_floder, mask_floder, imglist,dataset_root_path)
dataset_val.load_shapes(3, img_floder, mask_floder, imglist,dataset_root_path)
dataset_val.prepare()

#print("dataset_val-->",dataset_val._image_ids)

# Load and display random samples
#image_ids = np.random.choice(dataset_train.image_ids, 4)
#for image_id in image_ids:
#    image = dataset_train.load_image(image_id)
#    mask, class_ids = dataset_train.load_mask(image_id)
#    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last()[1], by_name=True)

# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=10,
            layers='heads')



# Fine tune all layers
# Passing layers="all" trains all layers. You can also
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE / 10,
            epochs=30,
            layers="all")

如果出现 no model named mrcnn 的错误 只需要在终端运行语句

python setup.py install

就ok了

这里需要几个部分的修改:

①需要添加字段 不然会有报错

import tensorflow as tf
from keras import backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
K.set_session(sess)

②修改num_classes = 1 + 自己定义的类别数 底下两个参数 好像是根据自己的输入图片设置的 我没搞太懂 就设置了最小的 之后我把它改成了(960,544)居然没有报错 可能我之前搞错了长和宽 这个还是改成自己图片的尺寸比较好

NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

③修改定义的类别

 # Add classes
        self.add_class("shapes", 1, "car1")
        # self.add_class("shapes", 2, "b")
        # self.add_class("shapes", 3, "c")
        # self.add_class("shapes", 4, "e")

load_mask中的类别也要改

        for i in range(len(labels)):
            if labels[i].find("car1") != -1:
                labels_form.append("car1")
            # elif labels[i].find("b") != -1:
            #     labels_form.append("b")
            # elif labels[i].find("c") != -1:
            #     labels_form.append("c")
            # elif labels[i].find("e") != -1:
            #     labels_form.append("e")

④在基础类别 里面修改文件路径

#基础设置
dataset_root_path="dataset/"
img_floder = dataset_root_path + "pic"
mask_floder = dataset_root_path + "cv2_mask"
#yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)

然后运行就可以了

Mask rcnn 训练自己的数据集_第6张图片

关于测试 直接将 demo.ipynb 转为py文件 然后修改权重文件和图片路径代码就可以

此处修改权重文件路径

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)

此处修改需要预测的图片的路径

# Load a random image from the images folder
# file_names = next(os.walk(IMAGE_DIR))[2]
# image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))
import cv2
image = cv2.imread(r"D:\Desktop\ririzhu.jpg")

测试自己的数据集

还是参考了一个代码

# -*- coding: utf-8 -*-


import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
import time
from mrcnn.config import Config
from datetime import datetime

# Root directory of the project
ROOT_DIR = os.getcwd()

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples/coco/"))  # To find local version
from samples.coco import coco

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs/shapes20210703T0938/")

# Local path to trained weights file
# COCO_MODEL_PATH = os.path.join(MODEL_DIR, "mask_rcnn_shapes_0030.h5")
COCO_MODEL_PATH = r"logs/shapes20210703T0938/mask_rcnn_shapes_0030.h5"
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)
    print("wancheng***********************")

# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "images")


class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 100

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 50


# import train_tongue
# class InferenceConfig(coco.CocoConfig):
class InferenceConfig(ShapesConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1


config = InferenceConfig()

model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)

# COCO Class names
# Index of the class in the list is its ID. For example, to get ID of
# the teddy bear class, use: class_names.index('teddy bear')
class_names = ['a','car1']
# Load a random image from the images folder
# file_names = next(os.walk(IMAGE_DIR))[2]
# image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))
import cv2
image = skimage.io.imread(r"D:\Desktop\car_9.jpg")
a = datetime.now()
# Run detection
results = model.detect([image], verbose=1)
b = datetime.now()
# Visualize results
print("shijian", (b - a).seconds)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                             class_names, r['scores'])

修改部分设置就可以了

需要注意的是 在class_names 的设定中 如果之训练了一种类别(我就是),要在前面增加一种类别 其实最好是写’BG‘来代表backgroud 不然会搞混

不然会报错: IndexError: list index out of range

class_names = ['bg','car1']

如图 我训练的类别是 car1 前面随便加一种类别就 ok 了

Mask rcnn 训练自己的数据集_第7张图片

批量测试自己的数据集(以原尺寸保存结果)

这一部分我也是弄了很久 参考了各种网上的经验 最后自己总结 改动了一下 但是我觉得 很适合我这种菜鸟 上代码 把前面单个图片测试的语句注释掉 然后在test.py后面加上这段语句就行


# 写一个批量测试的
test_dir = 'dataset/pic'  #保存测试图片的文件夹
a = os.listdir(test_dir)
for i in range(len(a)):
    image = image = skimage.io.imread(os.path.join(test_dir + '/' + a[i]))
    results = model.detect([image], verbose=1)
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                                class_names, r['scores'])
    plt.savefig(r"D:\Desktop\summer\Mask_RCNN-master\dataset\test/" + a[i], pad_inches=0.0)#测试结果的保存地址
#结果图片的命名 就使用了原来test图片的名字 我相信这是大部分人都希望的做法

仅仅这样是不够的 因为一方面它会每测试一张就显示一次 有点烦 最主要的是 只修改test语句保存的图片四周有很宽大的白边  所以要改动visualize.py中的display.instance

首先把# ax.set_ylim(height + 10, -10) # ax.set_xlim(-10, width + 10) 这两句注释掉

height, width = image.shape[:2]
    # ax.set_ylim(height + 10, -10)
    # ax.set_xlim(-10, width + 10)
    ax.axis('off')
    ax.set_title(title)

然后修改if auto_show: 后面的内容

    if auto_show:
        ###############保存预测结果图像

        fig = plt.gcf()
        fig.set_size_inches(width / 100.0, height / 100.0)  # 输出原始图像width*height的像素
        plt.gca().xaxis.set_major_locator(plt.NullLocator())
        plt.gca().yaxis.set_major_locator(plt.NullLocator())
        plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
        plt.margins(0, 0)
         

        # plt.show()       #在保存预测结果图像时,如果不想没保存一张显示一次,可以把他注释掉。

我看到网上大部分的做法是 在display_instance类中增加一个count(类似我增加的a)然后在visualize.py中保存图片 我刚开始也参考了这种做法 但是一直报错 我也没太找到原因,尝试了在test.py中直接使用保存语句 对我来说好理解多了 也没有报错

接下来就是结果了 

Mask rcnn 训练自己的数据集_第8张图片

参考:

https://blog.csdn.net/hesongzefairy/article/details/104702119

(13条消息) mask rcnn训练自己的数据集_Tom Hardy的博客-CSDN博客_maskrcnn训练自己的数据集

mask_rcnn训练自己的数据集 - 一颗蘋果 - 博客园 (cnblogs.com)

你可能感兴趣的:(python,深度学习,tensorflow)