对xml的解析与操作在《python学习(二) ElementTree解析、读写、创建xml文件》一文中介绍完毕
本文主要是使用ElementTree对VOC数据集的xml操作的实例化
并附带一些实用小程序供大家参考使用,程序代码在下面的github网站中
https://github.com/A-mockingbird/VOCtype-datasetOperation
里面有统计VOC数据集各类别目标框数量、删除某一类别或修改某一类别名称等操作的代码
1.介绍一个简单的实例:统计自己创建voc数据集中各类别目标框数量
(1)import所需的库
import io
import sys
import os
import xml.etree.ElementTree as ET
如遇到编码问题
sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
(2)解析xml文件
filepath为xml文件名
anno_path存储xml文件的文件夹路径
获取xml树结构
对xml树进行遍历,获取object(即目标)子类
对object中的目标框类别、位置信息解析,存入annos列表
返回annos及此xml文件中目标框数量num
def _ParseAnnotation(filepath):
if os.path.exists(anno_path + filepath) == False:
print(filepath+' :not found')
tree = ET.parse(anno_path + filepath)
annos = [None]*30
num = 0
for annoobject in tree.iter():
if 'object' in annoobject.tag:
for element in list(annoobject):
if 'name' in element.tag:
name = element.text
if 'bndbox' in element.tag:
for size in list(element):
if 'xmin' in size.tag:
xmin = size.text
if 'ymin' in size.tag:
ymin = size.text
if 'xmax' in size.tag:
xmax = size.text
if 'ymax' in size.tag:
ymax = size.text
annos[num] = {'name':name, 'xmin':int(xmin), 'ymin':int(ymin), 'xmax':int(xmax), 'ymax':int(ymax)}
#annos[num] = {'name':name, 'xmin':xmin, 'ymin':ymin, \
# 'xmax':xmax, 'ymax':ymax}
num += 1
return num, annos
(3)统计各类别目标框数量
num为之前获取的当前xml文件的目标框数量,annos存储目标框类别、位置信息,
anno_num为存储的各类别目标框总数
class_name存储自己创建的VOC数据集类别名称
def _Count(num, annos, anno_num):
for i in range(num):
for j in range(len(class_name)):
if annos[i]['name'] == class_name[j]:
anno_num[j] += 1
return anno_num
(4)批量删除或修改类别
import io
import sys
import os
import xml.etree.ElementTree as ET
#sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
#删除或者修改数据集某一类的xml文件
##修改下面的地址为你存放xml文件的位置,注意斜杠使用/,最后末尾需要加上/
#old_annotation是修要修改的标签名,new_annotation是修改后的标签名字
anno_path = 'F:/数据集/MOCOD遮挡目标/MOCOD/Annotations-withoutocclusion/'
old_annotation = 'normal insulator'
new_annotation = 'normal single insulator'
del_annotations = ['occlusion']
#replace = True使用替换功能
#replace = False使用删除功能
REPLACE = False
def _main():
filelist = os.listdir(anno_path)
i = 0
if REPLACE == True:
for file in filelist:
n_ = _Replace_Annotation(file)
if n_ > 0:
i += 1
else:
for file in filelist:
n_ = _Del_Annotation(file)
if n_ >0:
i += 1
print('the number of xmlfile is :' + str(i))
def _Replace_Annotation(filepath):
if os.path.exists(anno_path + filepath) == False:
print(filepath+' :not found')
#建立xml树状结构
i = 0
while Replace_(filepath) == False:
i += 1
return i
def Replace_(filepath):
if os.path.exists(anno_path + filepath) == False:
print(filepath+' :not found')
#建立xml树状结构
tree = ET.parse(anno_path + filepath)
#遍历xml文件 查找'name'
for annoobject in tree.iter():
if 'object' in annoobject.tag:
for element in list(annoobject):
if 'name' in element.tag:
#替换标签
if element.text == old_annotation:
element.text = new_annotation
print(filepath)
#重新写入xml,使修改生效
tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True)
return False
return True
def _Del_Annotation(filepath):
if os.path.exists(anno_path + filepath) == False:
print(filepath+' :not found')
#建立xml树状结构
i = 0
while Delete_(filepath) == False:
i += 1
return i
def Delete_(filepath):
if os.path.exists(anno_path + filepath) == False:
print(filepath+' :not found')
#建立xml树状结构
tree = ET.parse(anno_path + filepath)
#遍历xml文件 查找'name'
root = tree.getroot()
for annoobject in root.iter():
if 'object' in annoobject.tag:
for element in list(annoobject):
if 'name' in element.tag:
#删除标签
for anno in del_annotations:
if element.text == anno:
#从根节点下删除第一个子节点
root.remove(annoobject)
print(filepath)
#重新写入xml,使修改生效
tree = ET.ElementTree(root)
tree.write(anno_path+filepath, encoding="utf-8", xml_declaration=True)
return False
return True
if __name__ == '__main__':
_main()
github:https://github.com/A-mockingbird/VOCxml_operator/blob/master/CountObject.py