VOC的XML标记批量转为Labelme的JSON格式

最近在研究使用深度学习进行图像分割。需要先把标记图像转成掩膜图像,只有Labelme格式的标记格式支持转成掩膜图像。准备先把手头的VOC XML格式的标记转成Labelme格式。参考网络的代码,编写了一个单文件的转换小工具 voc_to_labelme.py。

VOC数据集的格式如下:

VOCdevkit/
   VOC2007/
      Annotations/
      JPEGImages/

命令行工具用法:

python voc_to_labelme.py 
命令行参数解释:
--voc_dir  VOC数据集目录,默认VOCdevkit/VOC2007
--labelme_version Labelme版本号,默认3.2.6
--labelme_shape   Labelme标记框形状,支持rectangle或polygon,默认rectangle
--image_data      Labelme的imageData节点是否输出数据,默认True
--out_dir         Labelme格式数据集的输出目录

voc_to_labelme.py的源码:

'''
VOC格式转换为labelme的json格式
'''

import argparse
import glob
import base64
import logging
import io
import os
import PIL
import PIL.Image
import xml.etree.ElementTree as ET
import json
import shutil

def parse_opt(known=False):
    parser = argparse.ArgumentParser(description='xml2json')
    parser.add_argument('--voc_dir', default='VOCdevkit/VOC2007', help='voc directory')
    parser.add_argument('--labelme_version', default='3.2.6', help='labelme version')
    parser.add_argument('--labelme_shape', default='rectangle', help='labelme shape')
    parser.add_argument('--image_data', default=True, type=bool, help='wether write image data to json')
    parser.add_argument('--out_dir', default='labelme', help='the path of output directory')
    opt = parser.parse_args()
    return opt

def img_data_to_pil(img_data):
    f = io.BytesIO()
    f.write(img_data)
    img_pil = PIL.Image.open(f)
    return img_pil


def img_data_to_arr(img_data):
    img_pil = img_data_to_pil(img_data)
    img_arr = np.array(img_pil)
    return img_arr
    
def img_arr_to_b64(img_arr):
    img_pil = PIL.Image.fromarray(img_arr)
    f = io.BytesIO()
    img_pil.save(f, format="PNG")
    img_bin = f.getvalue()
    if hasattr(base64, "encodebytes"):
        img_b64 = base64.encodebytes(img_bin)
    else:
        img_b64 = base64.encodestring(img_bin)
    return img_b64

def apply_exif_orientation(image):
    try:
        exif = image._getexif()
    except AttributeError:
        exif = None

    if exif is None:
        return image

    exif = {
        PIL.ExifTags.TAGS[k]: v
        for k, v in exif.items()
        if k in PIL.ExifTags.TAGS
    }

    orientation = exif.get("Orientation", None)

    if orientation == 1:
        # do nothing
        return image
    elif orientation == 2:
        # left-to-right mirror
        return PIL.ImageOps.mirror(image)
    elif orientation == 3:
        # rotate 180
        return image.transpose(PIL.Image.ROTATE_180)
    elif orientation == 4:
        # top-to-bottom mirror
        return PIL.ImageOps.flip(image)
    elif orientation == 5:
        # top-to-left mirror
        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))
    elif orientation == 6:
        # rotate 270
        return image.transpose(PIL.Image.ROTATE_270)
    elif orientation == 7:
        # top-to-right mirror
        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))
    elif orientation == 8:
        # rotate 90
        return image.transpose(PIL.Image.ROTATE_90)
    else:
        return image
        
def load_image_file(filename):
    image_pil = PIL.Image.open(filename)

    # apply orientation to image according to exif
    image_pil = apply_exif_orientation(image_pil)

    with io.BytesIO() as f:
        ext = os.path.splitext(filename)[1].lower()
        if ext in [".jpg", ".jpeg"]:
            format = "JPEG"
        else:
            format = "PNG"
        image_pil.save(f, format=format)
        f.seek(0)
        return f.read()

def read_xml_gtbox_and_label(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    depth = int(size.find('depth').text)
    points = []
    for obj in root.iter('object'):
        cls = obj.find('name').text
        pose = obj.find('pose').text
        xmlbox = obj.find('bndbox')
        xmin = float(xmlbox.find('xmin').text)
        xmax = float(xmlbox.find('xmax').text)
        ymin = float(xmlbox.find('ymin').text)
        ymax = float(xmlbox.find('ymax').text)
        point = [cls, xmin, ymin, xmax, ymax]
        points.append(point)
    return points, width, height

def voc_bndbox_to_labelme(opt):
    xml_dir = os.path.join(opt.voc_dir,'Annotations')
    img_dir = os.path.join(opt.voc_dir,'JPEGImages')
    if not os.path.exists(opt.out_dir):
        os.makedirs(opt.out_dir)
    
    xml_files = glob.glob(os.path.join(xml_dir,'*.xml'))
    for xml_file in xml_files:
        _, filename = os.path.split(xml_file)
        filename = filename.rstrip('.xml')
        img_name = filename + '.jpg'
        img_path = os.path.join(img_dir, img_name)
        points, width, height = read_xml_gtbox_and_label(xml_file)
        json_str = {}
        json_str['version'] = opt.labelme_version
        json_str['flags'] = {}
        shapes = []
        for i in range(len(points)):
            cls, xmin, ymin, xmax, ymax = points[i]
            shape = {}
            shape['label'] = cls
            if opt.labelme_shape == 'rectangle':
                shape['points'] = [[xmin, ymin],[xmax, ymax]]
            else: #polygon
                shape['points'] = [[xmin, ymin],[xmax, ymin],[xmax, ymax],[xmin, ymax]]
            shape['line_color'] = None
            shape['fill_color'] = None
            shape['shape_type'] = opt.labelme_shape
            shape['flags'] = {}
            shapes.append(shape)
        json_str['shapes'] = shapes
        json_str['imagePath'] = img_name
        if opt.image_data:
            with open(img_path, "rb") as f:
                image_data = f.read()
            json_str['imageData'] = base64.b64encode(image_data).decode("utf-8")
        else:
            json_str['imageData'] = None
        json_str['imageHeight'] = height
        json_str['imageWidth'] = width
        json_str['lineColor'] = [0,255,0,128]
        json_str['fillColor'] = [255,0,0,128]
        target_path = os.path.join(opt.out_dir,img_name)
        shutil.copy(img_path, target_path)
        json_file = os.path.join(opt.out_dir, filename + '.json')
        with open(json_file, 'w') as f:
            json.dump(json_str, f, indent=2)

def main(opt):
    voc_bndbox_to_labelme(opt)
    
if __name__ == '__main__':
    opt = parse_opt()
    main(opt)

参考文章:

https://blog.csdn.net/qq_43276926/article/details/124259734

你可能感兴趣的:(深度学习,xml,计算机视觉,人工智能)