将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件

  我所用的是anaconda+python3.6。

一、环境搭建

# windows+anaconda
pip install git+https://github.com/philferriere/cocoapi.git#egg=pycocotools^&subdirectory=PythonAPI 
pip install cython
pip install git+git://github.com/waspinator/[email protected]

二、将RGB标注图像转换为json文件的具体步骤

首先,我下载的数据集是CVPPP的数据集,文件结构如下图:

将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件_第1张图片

第一步,需要将彩色的RGB标注图像提取出来单独放入一个文件夹名为labels的文件夹中,

代码如下:

import os
import shutil
path_img='F:/test_datas/cvppp_challenge/training/A4'
ls = os.listdir(path_img)
print(len(ls))
for i in ls:
    if i.find('label')!=-1: #label是区分的关键词
        shutil.move(path_img+'/'+i,"F:/test_datas/cvppp_challenge/training/A4_labels/"+i)

第二步,我们需要将文件目录格式设置如下

annotations中放的是单个黑白的mask图片,另一个文件夹中放的是原图。

train
    │ 
    └───annotations
    │    │ __.png
    │    │ ...
    │   
    └───.png
         │   ...
 
e.g

train
    │ 
    └───annotations
    │    │ 5_leaf_0.png        # 第5张图片的第0个叶片
    │    │ ...
    │   
    └───shapes_train2017
         │   5.png             # 第5张图片
         │   ...

根据文件目录格式,需要将RGB标注图像重命名,代码如下:

import os

# 批量重命名文件夹中的图片文件
class BatchRename():
    def __init__(self):
        self.path = 'F:/test_datas/cvppp_challenge/training/A1_labels'  #表示需要命名处理的文件夹

    def rename(self):
        filelist = os.listdir(self.path) 
        total_num = len(filelist) #获取文件夹内所有文件个数
        i = 1  #表示文件的命名是从1开始的
        for item in filelist:
            if item.endswith('.png'): 
            #初始的图片的格式为png格式的
                src = os.path.join(os.path.abspath(self.path), item)
                dst = os.path.join(os.path.abspath(self.path),str(i) + '.png')   
                try:
                    os.rename(src, dst)
                    print ('converting %s to %s ...' % (src, dst))
                    i = i + 1
                except:
                    continue
        print ('total %d to rename & converted %d jpgs' % (total_num, i))

if __name__ == '__main__':
    demo = BatchRename()
    demo.rename()

重命名后的结果如下:

将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件_第2张图片

将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件_第3张图片

第三步: 将RGB标注彩图转换为单个物体的黑白mask图像代码为,代码如下:


import cv2
import numpy as np
import os, glob
 
 
def rgb2masks(label_name):
    lbl_id = os.path.split(label_name)[-1].split('.')[0]
    lbl = cv2.imread(label_name, 1)
    h, w = lbl.shape[:2]
    leaf_dict = {}
    idx = 0
    white_mask = np.ones((h, w, 3), dtype=np.uint8) * 255
    for i in range(h):
        for j in range(w):
            if tuple(lbl[i][j]) in leaf_dict or tuple(lbl[i][j]) == (0, 0, 0):
                continue
            leaf_dict[tuple(lbl[i][j])] = idx
            mask = (lbl == lbl[i][j]).all(-1)
            leaf = np.where(mask[..., None], white_mask, 0)
            mask_name = 'F:/test_datas/cvppp_challenge/training/A1_train/annotations/' + lbl_id + '_leaf_' + str(idx) + '.png'
            cv2.imwrite(mask_name, leaf)
            idx += 1
 
 
label_dir = 'F:/test_datas/cvppp_challenge/training/A1_labels'
label_list = glob.glob(os.path.join(label_dir, '*.png'))
for label_name in label_list:
    rgb2masks(label_name)

代码运行结果如下:

将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件_第4张图片

第四步,利用pycococreator和得到的单片黑白masks生成coco json格式的数据集,代码如下:

import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
 
 
ROOT_DIR = 'F:/test_datas/cvppp_challenge/training/A1_train'
IMAGE_DIR = os.path.join(ROOT_DIR, "shapes_train2017")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")
 
INFO = {
    "description": "Leaf Dataset",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2017,
    "contributor": "Francis_Liu",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}
 
LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]
 
# 根据自己的需要添加种类
CATEGORIES = [
    {
        'id': 1,
        'name': 'leaf',
        'supercategory': 'leaf',
    }
]
 
 
def filter_for_jpeg(root, files):
    file_types = ['*.jpeg', '*.jpg', '*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    return files
 
 
def filter_for_annotations(root, files, image_filename):
    file_types = ['*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + '.*'
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
    return files
 
 
def main():
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
 
    image_id = 1
    segmentation_id = 1
 
    # filter for jpeg images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_jpeg(root, files)
 
        # go through each image
        for image_filename in image_files:
            image = Image.open(image_filename)
            image_info = pycococreatortools.create_image_info(
                    image_id, os.path.basename(image_filename), image.size)
            coco_output["images"].append(image_info)
 
            # filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_annotations(root, files, image_filename)
 
                # go through each associated annotation
                for annotation_filename in annotation_files:
 
                    print(annotation_filename)
                    class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]
 
                    category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
                    binary_mask = np.asarray(Image.open(annotation_filename)
                                             .convert('1')).astype(np.uint8)
 
                    annotation_info = pycococreatortools.create_annotation_info(
                            segmentation_id, image_id, category_info, binary_mask,
                            image.size, tolerance=2)
 
                    if annotation_info is not None:
                        coco_output["annotations"].append(annotation_info)
 
                    segmentation_id = segmentation_id + 1
 
            image_id = image_id + 1
 
    with open('{}/instances_leaf_train2017.json'.format(ROOT_DIR), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)
 
 
if __name__ == "__main__":
    main()

最后,生成一个json文件

将CVPPP中的彩色RGB分割标注图像数据集转换为COCO格式的json文件_第5张图片

这样就结束了!

你可能感兴趣的:(python,图像处理,深度学习)