xml terminator, 读取根目录下所有xml,按照标签类别分别放到各自的文件夹里

假如 xml文件都藏在这些文件夹里面


image.png

文件夹里面还有子文件夹,多重子文件夹套娃


image.png

要注意里面的xml文件对应着同名的jpg文件!

处理后的结果,所有xml按照标签分好类丢进对应的文件夹里面


image.png

按照 annotation/imgs/labels 创建了文件夹,对应的文件都在里面


image.png
import os
import xml.etree.ElementTree as ET
from shutil import copyfile
import pathlib as  pathlib
from collections import defaultdict

def parse_obj(filename):
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):      #获取所有object,定位到每一个object里面
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text   #获取被name夹在中间的label名称,具体打开xml文件看
        objects.append(obj_struct)
    return objects


def getallfiles(path):
    allfile = []
    file_xml = []
    for dirpath, dirnames, filenames in os.walk(path):
        for dir in dirnames:
            allfile.append(os.path.join(dirpath, dir))
        for name in filenames:
            allfile.append(os.path.join(dirpath, name))
    for file in allfile:
        if file.endswith('.xm',-4,-1):
            file_xml.append(file)
    return file_xml


if __name__ == '__main__':
    # filenamess = os.listdir('E:\code_yyq\get_data')
    filenames = getallfiles(r'D:\Yuqian_Yang\project_yolov4\yolo\data\smokephone\imgs')
    aim_root = 'D:\Yuqian_Yang\project_yolov4\yolo\data\smoke'    # 转移的文件夹路径;注意下划线
    classnames = []
    recs={}
    copy_key = defaultdict(list)
    for i, name in enumerate(filenames):
        recs[name] = parse_obj(name)
    if not os.path.exists(aim_root):
        os.makedirs(aim_root)
    for name in filenames:
        num_key = 0
        for object in recs[name]:
            if object['name'] not in classnames:
                copy_key['%s' %(object['name'])].append(name)
                if not os.path.exists(aim_root + "/%s" % object['name']):
                    os.makedirs(aim_root + "/%s" % object['name'])
                    os.makedirs(aim_root + "/%s" % object['name'] + "/anotation")
                    os.makedirs(aim_root + "/%s" % object['name'] + "/imgs")
                    os.makedirs(aim_root + "/%s" % object['name'] + "/labels")

    for name in copy_key:

        for i in range(0, len(copy_key['%s'%name])):

            file_jpg = "%s" % copy_key['%s'%name][i].replace("xml", "jpg").replace("\\", "/").replace("//","/")
            file_JPG = "%s" % copy_key['%s' % name][i].replace("xml", "JPG").replace("\\", "/").replace("//", "/")
            # print(file_jpg)
            file_xml = "%s" % copy_key['%s' % name][i]
            f_jpg = os.path.basename(file_jpg)
            f_JPG = os.path.basename(file_JPG)
            f_xml = os.path.basename(file_xml)
            if pathlib.Path(file_jpg).exists():
                try:
                    copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_jpg)
                    copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
                except:
                    print("warning:", file_jpg)
            elif pathlib.Path(file_JPG).exists():
                try:
                    copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_JPG)
                    copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
                except:
                    print("warning:", file_jpg)
            else:
                print("Error: no such jpg file:", file_jpg)

你可能感兴趣的:(xml terminator, 读取根目录下所有xml,按照标签类别分别放到各自的文件夹里)