VOC转yolov5 数据集格式转换脚本

voc_label.py

import sys
import pathlib
import shutil
import random
import xml.etree.ElementTree as ET

classes = ['closed_eye', 'open_eye', 'closed_mouth', 'open_mouth']

script_path = pathlib.Path(sys.argv[0])
img_path = script_path.parent / "images"
if img_path.exists():
    shutil.rmtree(str(img_path))
(img_path / 'train').mkdir(0o700, parents=True, exist_ok=True)
(img_path / 'val').mkdir(0o700, exist_ok=True)
label_path = script_path.parent / "labels"
if label_path.exists():
    shutil.rmtree(str(label_path))
(label_path / 'train').mkdir(0o700, parents=True, exist_ok=True)
(label_path / 'val').mkdir(0o700, exist_ok=True)

xml_path = script_path.parent / "Annotations"
xml_list = list(xml_path.rglob('*.xml'))
random.shuffle(xml_list)
txt_path = script_path.parent / 'ImageSets/Main'
txt_path.mkdir(0o700, parents=True, exist_ok=True)

train_percent = 0.9
train_num = int(len(xml_list) * train_percent)


for cnt, xml_fpath in enumerate(xml_list):
    img_idx = int(xml_fpath.stem)
    mode = 'train' if cnt < train_num else 'val'

    # Copy the image
    src_img_path = script_path.parent / f'JPEGImages/{img_idx}.jpg'
    dst_img_path = img_path / mode
    shutil.copy(str(src_img_path), str(dst_img_path))

    src_xml_file = xml_fpath.open('r', encoding='utf-8')
    dst_txt_file = (
        label_path / f'{mode}/{img_idx}.txt').open('w', encoding='utf-8')
    xml_tree = ET.parse(src_xml_file)
    xml_root = xml_tree.getroot()
    xml_size = xml_root.find('size')

    width = int(xml_size.find('width').text)
    height = int(xml_size.find('height').text)

    for obj in xml_root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)

        xmlbox = obj.find('bndbox')
        xmin = float(xmlbox.find('xmin').text)
        xmax = float(xmlbox.find('xmax').text)
        ymin = float(xmlbox.find('ymin').text)
        ymax = float(xmlbox.find('ymax').text)
        if xmax > width:
            raise RuntimeWarning(
                "xmax > width in img_id:{img_id} cls_id:{cls_id}")
            xmax = width
        if ymax > height:
            raise RuntimeWarning(
                "ymax > height in img_id:{img_id} cls_id:{cls_id}")
            ymax = height

        x = ((xmin + xmax) / 2 - 1) / width
        y = ((ymin + ymax) / 2 - 1) / height
        w = (xmax - xmin) / width
        h = (ymax - ymin) / height

        dst_txt_file.write(f"{cls_id} {x} {y} {w} {h}\n")

脚本位置
VOC转yolov5 数据集格式转换脚本_第1张图片
yaml文件内容
VOC转yolov5 数据集格式转换脚本_第2张图片

你可能感兴趣的:(Python,深度学习,python,深度学习)