#!/usr/bin/python
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
import glob
from PIL import Image
LBAEL_ROOT = "E:/Python_WORKSPACE/Other_Code/xml文件解析/YoloLabels2dlibXML/"
src_path=LBAEL_ROOT+"labels"
pic_path=LBAEL_ROOT+"JPEGImages"
dst_path=LBAEL_ROOT+"dlibXML"
classes = ["trunk", "bus", "car", "mbc", "mbc_trunk","special"]
#xml格式美化
def prettyXml(element, indent, newline, level=0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
if element: # 判断element是否有子元素
if element.text == None: # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉,Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将elemnt转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
prettyXml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
#将yolov3的训练标签label中的多个.txt文件转换成dlib训练标签
def yoloLabels2xml(src_path,dst_path):
"""解析yolo的多个txt标签,合并成一个dlib的xml标签文件
yolo的txt文件内容格式为2 0.192969 0.512500 0.167187 0.166667
dlib的xml格式为:
<images>
<image file="1_1960.jpg">
<box height="112" left="949" top="502" width="121"><label>smoking</label></box>
</image>
<image file="1_1970.jpg">
<box height="110" left="939" top="508" width="118"><label>smoking</label></box>
</image>
</images>
添加到一个.XML文件中,并写入磁盘即可
"""
# 创建root element
dataset_train = ET.Element("dataset")
# 直接通过SubElement类为root element添加多个子元素
name0 = ET.SubElement(dataset_train, "name")
images0 = ET.SubElement(dataset_train, "images")
name0.text = "yolo2dlib vehicle dataset"
# 以指定的root element创建一个ElementTree实例
datasetTreetrain = ET.ElementTree(element=dataset_train)
# 创建root element
dataset_test= ET.Element("dataset")
# 直接通过SubElement类为root element添加多个子元素
name1 = ET.SubElement(dataset_test, "name")
images1 = ET.SubElement(dataset_test, "images")
name1.text = "yolo2dlib vehicle dataset"
# 以指定的root element创建一个ElementTree实例
datasetTreetest = ET.ElementTree(element=dataset_test)
YoloTxtList=glob.glob(src_path+"/*")
idx=0
for yolotxt in YoloTxtList:
pictxtname=yolotxt.split("\\")[-1]
picame=pictxtname.replace(".txt",".jpg")
img = Image.open(pic_path+"/"+picame)
w=img.size[0]
h=img.size[1]
with open(yolotxt) as file:
image = ET.Element("image", file=picame)
yoloLabelsLines=file.readlines()
for yoloLabelLine in yoloLabelsLines:
yoloLabelLine.rstrip()
labelLine=yoloLabelLine.split(" ")
cls_id=labelLine[0]
x1=labelLine[1]
y1=labelLine[2]
w1=labelLine[3]
h1=labelLine[4]
t=1.0/2*(2*(h*float(y1)+1)-float(h1)*h)
l=1.0/2*(2*(w*float(x1)+1)-float(w1)*w)
box = ET.Element("box", top=str(int(t)),left=str(int(l)),width=str(int(int(w)*float(w1))),height=str(int(int(h)*float(h1))))
image.append(box)
label = ET.SubElement(box, "label")
label.text = classes[int(cls_id)]
if idx > len(YoloTxtList) * 0.9:
images1.append(image)
else:
images0.append(image)
idx+=1
prettyXml(dataset_train, '\t', '\n') # 执行美化方法
prettyXml(dataset_test, '\t', '\n') # 执行美化方法
datasetTreetrain.write(dst_path + "/training.xml", encoding="utf-8", xml_declaration=True)
datasetTreetest.write(dst_path + "/testing.xml", encoding="utf-8", xml_declaration=True)
if __name__ == "__main__":
yoloLabels2xml(src_path, dst_path)
【1】python xml格式美化
【2】Python学习-将list列表写入文件并读取方法汇总
【3】python中list与string的转换