自建数据集系列:从labelme格式->VOC格式+从二值mask->VOC格式

文章目录

    • 前言
    • labelme2Xml
    • 训练、验证划分
    • mask2Xml
    • 汇总
      • 1.从labelImg格式->txt格式(YOLO格式、ICDAR2015格式)
      • 2.从二值mask->labelme格式->coco格式
      • 3.从labelme格式->VOC格式+从二值mask->VOC格式
      • 4.从RGB->二值mask->coco格式
      • 5.实例分割mask->语义分割mask->扩增mask
      • 6.COCO格式->YOLO格式
      • 双模图片数据与对应标注文件的命名对齐
      • xml标注文件的节点、属性、文本的修正
      • cocoJson数据集统计分析

前言

标注软件常用的也就LabelImg和Labelme,分别用于目标检测与分割的标注。但是吧,咱不能被工具限制死,公开的是是分割,咱可以转换为检测需要的格式啊。本文从两个角度达到目的,一是通过labelme的json格式,二是直接通过二值mask

自建数据集系列:从labelme格式->VOC格式+从二值mask->VOC格式_第1张图片
JPEGImages夹下就是原始图片,Annotions文件夹中是xml格式的标注文件,Main下就是数据集切分的训练验证测试的txt存储对应的文件名

先看下咱的目标labelImg的xml格式:

<annotation>
	<folder>WH_datafolder>
	<filename>1552.jpgfilename>
	<source>
		<database>WH Datadatabase>
		<annotation>WHannotation>
		<image>flickrimage>
		<flickrid>NULLflickrid>
	source>
	<owner>
		<flickrid>NULLflickrid>
		<name>WHname>
	owner>
	<size>
		<width>1920width>
		<height>1080height>
		<depth>3depth>
	size>
		<segmented>0segmented>
	<object>
		<name>tankname>
		<pose>Unspecifiedpose>
		<truncated>1truncated>
		<difficult>0difficult>
		<bndbox>
			<xmin>1213xmin>
			<ymin>921ymin>
			<xmax>1392xmax>
			<ymax>1004ymax>
		bndbox>
	object>
annotation>

下面我们看一下如何转换吧

labelme2Xml

数据源labelme的json格式:

{
  "version": "4.5.13",
  "flags": {},
  "shapes": [
    {
      "label": "tank",
      "points": [
        [
          1219.6581196581196,
          921.3675213675215
        ],
        [
          1298.2905982905984,
          931.1965811965813
        ],
        [
          1337.6068376068376,
          939.3162393162394
        ],
        [
          1342.3076923076924,
          947.0085470085471
        ],
        [
          1342.7350427350427,
          962.8205128205129
        ],
        [
          1389.3162393162395,
          971.3675213675215
        ],
        [
          1392.3076923076924,
          974.7863247863248
        ],
        [
          1392.3076923076924,
          977.7777777777778
        ],
        [
          1344.871794871795,
          970.5128205128206
        ],
        [
          1342.7350427350427,
          1003.8461538461539
        ],
        [
          1336.7521367521367,
          1004.2735042735044
        ],
        [
          1226.923076923077,
          985.8974358974359
        ],
        [
          1213.2478632478633,
          978.6324786324786
        ]
      ],
      "group_id": null,
      "shape_type": "polygon",
      "flags": {}
    }
  ],
  "imagePath": "1552.jpg",
  "imageData": null,
  "imageHeight": 1080,
  "imageWidth": 1920
}

转换json格式为矩形的xml,然后拷贝原始图片至JPEGImages

labelme2Xml.py # ps:json文件和原始图片在同一个文件夹下

# -*- coding: utf-8 -*-
import os
from typing import List, Any
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
 
# 1.标签路径
labelme_path = r"C:\\Users\\fei\Downloads\\json2xml\\tank_zhu\\"
#原始labelme标注数据路径
saved_path = "VOC2007/"
# 保存路径
isUseTest=True#是否创建test集
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
    os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
    os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
    os.makedirs(saved_path + "ImageSets/Main/")
# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\","/").split("/")[-1].split(".json")[0] for i in files]
print(files)
# 4.读取标注信息并写入 xml
for json_file_ in files:
    json_filename = labelme_path + json_file_ + ".json"
    json_file = json.load(open(json_filename, "r", encoding="utf-8"))
    height, width, channels = cv2.imread(labelme_path + json_file_ + ".jpg").shape
    with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
 
        xml.write('\n')
        xml.write('\t' + 'WH_data' + '\n')
        xml.write('\t' + json_file_ + ".jpg" + '\n')
        xml.write('\t\n')
        xml.write('\t\tWH Data\n')
        xml.write('\t\tWH\n')
        xml.write('\t\tflickr\n')
        xml.write('\t\tNULL\n')
        xml.write('\t\n')
        xml.write('\t\n')
        xml.write('\t\tNULL\n')
        xml.write('\t\tWH\n')
        xml.write('\t\n')
        xml.write('\t\n')
        xml.write('\t\t' + str(width) + '\n')
        xml.write('\t\t' + str(height) + '\n')
        xml.write('\t\t' + str(channels) + '\n')
        xml.write('\t\n')
        xml.write('\t\t0\n')
        for multi in json_file["shapes"]:
            points = np.array(multi["points"])
            labelName=multi["label"]
            xmin = min(points[:, 0])
            xmax = max(points[:, 0])
            ymin = min(points[:, 1])
            ymax = max(points[:, 1])
            label = multi["label"]
            if xmax <= xmin:
                pass
            elif ymax <= ymin:
                pass
            else:
                xml.write('\t\n')
                xml.write('\t\t' + labelName+ '\n')
                xml.write('\t\tUnspecified\n')
                xml.write('\t\t1\n')
                xml.write('\t\t0\n')
                xml.write('\t\t\n')
                xml.write('\t\t\t' + str(int(xmin)) + '\n')
                xml.write('\t\t\t' + str(int(ymin)) + '\n')
                xml.write('\t\t\t' + str(int(xmax)) + '\n')
                xml.write('\t\t\t' + str(int(ymax)) + '\n')
                xml.write('\t\t\n')
                xml.write('\t\n')
                print(json_filename, xmin, ymin, xmax, ymax, label)
        xml.write('')
# 5.复制图片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
    shutil.copy(image, saved_path + "JPEGImages/")

训练、验证划分

自建数据集系列:从labelme格式->VOC格式+从二值mask->VOC格式_第2张图片

splitData_labelImg.py

import os  
import random  
  
trainval_percent = 1.0 # 训练验证样本占所有样本的比重
train_percent = 0.8 # 训练样本占训练验证样本的比重
xmlfilepath = 'VOC2007/Annotations'  
txtsavepath = 'VOC2007/ImageSets/Main'  
total_xml = os.listdir(xmlfilepath)  
  
num=len(total_xml)  
list=range(num)  
tv=int(num*trainval_percent)  
tr=int(tv*train_percent)  
trainval= random.sample(list,tv)  
train=random.sample(trainval,tr)  
  
ftrainval = open('VOC2007/ImageSets/Main/trainval.txt', 'w')   # 'a'为append
ftest = open('VOC2007/ImageSets/Main/test.txt', 'w')  
ftrain = open('VOC2007/ImageSets/Main/train.txt', 'w')  
fval = open('VOC2007/ImageSets/Main/val.txt', 'w')

  
for i  in list:
    if total_xml[i].endswith(".xml"):
        print(total_xml[i])
        name=total_xml[i][:-4]+'\n'  
        if i in trainval:  
            ftrainval.write(name)  
            if i in train:  
                ftrain.write(name)  
            else:  
                fval.write(name)  
        else:  
            ftest.write(name)  
  
ftrainval.close()  
ftrain.close()  
fval.close()  
ftest.close()

mask2Xml

值得注意每张原始图, 有时会对应多个实例
自建数据集系列:从labelme格式->VOC格式+从二值mask->VOC格式_第3张图片

mask2Xml.py

#!/usr/bin/env python3
 
from genericpath import exists
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
from pycocotools import mask
from PIL import Image
import codecs
from glob import glob
import shutil
 
part = "train"   # train  test

ROOT_DIR = 'C:/Users/awei/Desktop/mask2Labelme/COD10K/'+part+'/'
IMAGE_DIR = os.path.join(ROOT_DIR, "Image")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "GT")

def filter_for_jpeg(root, files):
    file_types = ['*.jpeg', '*.jpg', '*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    return files
 
 
def filter_for_annotations(root, files, image_filename):
    file_types = ['*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + '_.*'   # 用于匹配对应的二值mask
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
    return files


saved_path = "VOC2007/"
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
    os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
    os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
    os.makedirs(saved_path + "ImageSets/Main/")

ftrainval = open('VOC2007/ImageSets/Main/trainval.txt', 'a')   # 'a'为append
ftest = open('VOC2007/ImageSets/Main/test.txt', 'w')  
ftrain = open('VOC2007/ImageSets/Main/train.txt', 'w')  
fval = open('VOC2007/ImageSets/Main/val.txt', 'w')

def splitData():
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_jpeg(root, files)

        # go through each image
        for image_filename in image_files:
            if not os.path.exists(saved_path+ "JPEGImages/"+os.path.basename(image_filename)):
                shutil.copy(image_filename, saved_path + "JPEGImages/")
            name = os.path.basename(image_filename).split('.')[0]
            if not os.path.exists(saved_path + "Annotations/"+name+".xml"):
                print("not exist:"+name+".xml")
                continue
            if part=="train":
                ftrain.write(name)
                ftrain.write('\n')
            else:
                fval.write(name)
                fval.write('\n')
            ftrainval.write(name)
            ftrainval.write('\n')

    ftrainval.close()  
    ftrain.close()  
    fval.close()  
    ftest.close()

def main():
    # filter for jpeg images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_jpeg(root, files)

        # go through each image
        for image_filename in image_files:
            image = Image.open(image_filename)
            width,height,channels = image.size[0],image.size[1],image.layers


            boxList = []
            labels=[]
            # filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_annotations(root, files, image_filename)
 
                # go through each associated annotation
                for annotation_filename in annotation_files:

                    print(annotation_filename)

                    name = os.path.basename(annotation_filename)
                    image_name = name.split('_')[-3]
                    label = name.split('_')[1] # 

                    
                    binary_mask = np.asarray(Image.open(annotation_filename)
                        .convert('1')).astype(np.uint8)
                    if image is not None:
                        binary_mask = pycococreatortools.resize_binary_mask(binary_mask, image.size)
                    binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))

                    bounding_box = mask.toBbox(binary_mask_encoded)
                    boxList.append([bounding_box[0],bounding_box[1],bounding_box[0]+bounding_box[2],bounding_box[1]+bounding_box[3]])
                    labels.append(label)

            # 读取标注信息并写入 xml
            with codecs.open(saved_path + "Annotations/" + image_name + ".xml", "w", "utf-8") as xml:
        
                xml.write('\n')
                xml.write('\t' + 'WH_data' + '\n')
                xml.write('\t' + image_name + ".jpg" + '\n')
                xml.write('\t\n')
                xml.write('\t\tWH Data\n')
                xml.write('\t\tWH\n')
                xml.write('\t\tflickr\n')
                xml.write('\t\tNULL\n')
                xml.write('\t\n')
                xml.write('\t\n')
                xml.write('\t\tNULL\n')
                xml.write('\t\tWH\n')
                xml.write('\t\n')
                xml.write('\t\n')
                xml.write('\t\t' + str(width) + '\n')
                xml.write('\t\t' + str(height) + '\n')
                xml.write('\t\t' + str(channels) + '\n')
                xml.write('\t\n')
                xml.write('\t\t0\n')
                for box,label in zip(boxList,labels):

                    xmin, ymin, xmax,ymax = box

                    if xmax <= xmin:
                        pass
                    elif ymax <= ymin:
                        pass
                    else:
                        xml.write('\t\n')
                        xml.write('\t\t' + label+ '\n')
                        xml.write('\t\tUnspecified\n')
                        xml.write('\t\t1\n')
                        xml.write('\t\t0\n')
                        xml.write('\t\t\n')
                        xml.write('\t\t\t' + str(int(xmin)) + '\n')
                        xml.write('\t\t\t' + str(int(ymin)) + '\n')
                        xml.write('\t\t\t' + str(int(xmax)) + '\n')
                        xml.write('\t\t\t' + str(int(ymax)) + '\n')
                        xml.write('\t\t\n')
                        xml.write('\t\n')
                        print(image_filename, xmin, ymin, xmax, ymax, label)
                xml.write('')

if __name__ == "__main__":
    main()  # 用于将mask生成xml
    splitData() # 用于切分数据(适用非随机的切分)

汇总

1.从labelImg格式->txt格式(YOLO格式、ICDAR2015格式)

2.从二值mask->labelme格式->coco格式

3.从labelme格式->VOC格式+从二值mask->VOC格式

4.从RGB->二值mask->coco格式

5.实例分割mask->语义分割mask->扩增mask

6.COCO格式->YOLO格式

双模图片数据与对应标注文件的命名对齐

xml标注文件的节点、属性、文本的修正

cocoJson数据集统计分析

你可能感兴趣的:(#,自建AI数据集,labelme格式,VOC格式,数据集格式转换,深度学习)