最近在研究使用深度学习进行图像分割。需要先把标记图像转成掩膜图像,只有Labelme格式的标记格式支持转成掩膜图像。准备先把手头的VOC XML格式的标记转成Labelme格式。参考网络的代码,编写了一个单文件的转换小工具 voc_to_labelme.py。
VOC数据集的格式如下:
VOCdevkit/
VOC2007/
Annotations/
JPEGImages/
命令行工具用法:
python voc_to_labelme.py
命令行参数解释:
--voc_dir VOC数据集目录,默认VOCdevkit/VOC2007
--labelme_version Labelme版本号,默认3.2.6
--labelme_shape Labelme标记框形状,支持rectangle或polygon,默认rectangle
--image_data Labelme的imageData节点是否输出数据,默认True
--out_dir Labelme格式数据集的输出目录
voc_to_labelme.py的源码:
'''
VOC格式转换为labelme的json格式
'''
import argparse
import glob
import base64
import logging
import io
import os
import PIL
import PIL.Image
import xml.etree.ElementTree as ET
import json
import shutil
def parse_opt(known=False):
parser = argparse.ArgumentParser(description='xml2json')
parser.add_argument('--voc_dir', default='VOCdevkit/VOC2007', help='voc directory')
parser.add_argument('--labelme_version', default='3.2.6', help='labelme version')
parser.add_argument('--labelme_shape', default='rectangle', help='labelme shape')
parser.add_argument('--image_data', default=True, type=bool, help='wether write image data to json')
parser.add_argument('--out_dir', default='labelme', help='the path of output directory')
opt = parser.parse_args()
return opt
def img_data_to_pil(img_data):
f = io.BytesIO()
f.write(img_data)
img_pil = PIL.Image.open(f)
return img_pil
def img_data_to_arr(img_data):
img_pil = img_data_to_pil(img_data)
img_arr = np.array(img_pil)
return img_arr
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def apply_exif_orientation(image):
try:
exif = image._getexif()
except AttributeError:
exif = None
if exif is None:
return image
exif = {
PIL.ExifTags.TAGS[k]: v
for k, v in exif.items()
if k in PIL.ExifTags.TAGS
}
orientation = exif.get("Orientation", None)
if orientation == 1:
# do nothing
return image
elif orientation == 2:
# left-to-right mirror
return PIL.ImageOps.mirror(image)
elif orientation == 3:
# rotate 180
return image.transpose(PIL.Image.ROTATE_180)
elif orientation == 4:
# top-to-bottom mirror
return PIL.ImageOps.flip(image)
elif orientation == 5:
# top-to-left mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))
elif orientation == 6:
# rotate 270
return image.transpose(PIL.Image.ROTATE_270)
elif orientation == 7:
# top-to-right mirror
return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))
elif orientation == 8:
# rotate 90
return image.transpose(PIL.Image.ROTATE_90)
else:
return image
def load_image_file(filename):
image_pil = PIL.Image.open(filename)
# apply orientation to image according to exif
image_pil = apply_exif_orientation(image_pil)
with io.BytesIO() as f:
ext = os.path.splitext(filename)[1].lower()
if ext in [".jpg", ".jpeg"]:
format = "JPEG"
else:
format = "PNG"
image_pil.save(f, format=format)
f.seek(0)
return f.read()
def read_xml_gtbox_and_label(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
depth = int(size.find('depth').text)
points = []
for obj in root.iter('object'):
cls = obj.find('name').text
pose = obj.find('pose').text
xmlbox = obj.find('bndbox')
xmin = float(xmlbox.find('xmin').text)
xmax = float(xmlbox.find('xmax').text)
ymin = float(xmlbox.find('ymin').text)
ymax = float(xmlbox.find('ymax').text)
point = [cls, xmin, ymin, xmax, ymax]
points.append(point)
return points, width, height
def voc_bndbox_to_labelme(opt):
xml_dir = os.path.join(opt.voc_dir,'Annotations')
img_dir = os.path.join(opt.voc_dir,'JPEGImages')
if not os.path.exists(opt.out_dir):
os.makedirs(opt.out_dir)
xml_files = glob.glob(os.path.join(xml_dir,'*.xml'))
for xml_file in xml_files:
_, filename = os.path.split(xml_file)
filename = filename.rstrip('.xml')
img_name = filename + '.jpg'
img_path = os.path.join(img_dir, img_name)
points, width, height = read_xml_gtbox_and_label(xml_file)
json_str = {}
json_str['version'] = opt.labelme_version
json_str['flags'] = {}
shapes = []
for i in range(len(points)):
cls, xmin, ymin, xmax, ymax = points[i]
shape = {}
shape['label'] = cls
if opt.labelme_shape == 'rectangle':
shape['points'] = [[xmin, ymin],[xmax, ymax]]
else: #polygon
shape['points'] = [[xmin, ymin],[xmax, ymin],[xmax, ymax],[xmin, ymax]]
shape['line_color'] = None
shape['fill_color'] = None
shape['shape_type'] = opt.labelme_shape
shape['flags'] = {}
shapes.append(shape)
json_str['shapes'] = shapes
json_str['imagePath'] = img_name
if opt.image_data:
with open(img_path, "rb") as f:
image_data = f.read()
json_str['imageData'] = base64.b64encode(image_data).decode("utf-8")
else:
json_str['imageData'] = None
json_str['imageHeight'] = height
json_str['imageWidth'] = width
json_str['lineColor'] = [0,255,0,128]
json_str['fillColor'] = [255,0,0,128]
target_path = os.path.join(opt.out_dir,img_name)
shutil.copy(img_path, target_path)
json_file = os.path.join(opt.out_dir, filename + '.json')
with open(json_file, 'w') as f:
json.dump(json_str, f, indent=2)
def main(opt):
voc_bndbox_to_labelme(opt)
if __name__ == '__main__':
opt = parse_opt()
main(opt)
参考文章:
https://blog.csdn.net/qq_43276926/article/details/124259734