提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
最近使用很多其它模型,大多模型都有自己对应格式,有时其它模型的cocojson格式的数据测试也都要进行数据格式转换,有时候也想看coco json注释显示在图上。那么,你可能需要一个coco json格式转xml格式代码,帮助查看,且我总是会把这些工具代码莫名丢失,重头写较为麻烦。为克服这些问题,本文将使用coco数据集格式作为标准,记录如何将coco数据格式转为xml格式。
下载好数据给定json格式路径变量值json_path,指定输出文件路径save_path,然后直接调用cocojson2xml函数,代码如下:
if __name__ == '__main__':
json_path=r"C:\Users\Administrator\Desktop\Data\coco2017\annotations\train.json"
save_path=r'C:\Users\Administrator\Desktop\Data\coco2017\123'
cocojson2xml(json_path, save_path)
代码自动读取coco json格式images、annotation、categories,并通过image_id和category_id找到每个图对应所有box与对应box的类别,同时从coco json文件解析图像高宽,而后使用函数等逻辑转为xml格式,代码会自动完成。
build_dir:构建文件夹
read_json:读取coco json文件注释
get_name:处理字符串路径,去除万一是个路径格式
product_xml:生成路径
cocojson2xml:这是主要集成函数,coco转xml
介于代码主要为逻辑,而非代码难度,我直接粘贴整个代码,可直接复制使用,代码如下:
import os
import json
from tqdm import tqdm
from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom.minidom import parseString
import numpy as np
def build_dir(root):
import os
if not os.path.exists(root):
os.makedirs(root)
return root
def read_json(json_root):
with open(json_root, encoding='utf-8') as f:
json_info = json.load(f)
return json_info
def get_name(name_str):
name_str=name_str.split('/')[-1] # 处理服务器方式
name_str=name_str.split('\\')[-1] # 处理windows方式
return name_str
def product_xml(name_img, boxes, codes, img=None, wh=None,save_path=None):
'''
:param img: 以读好的图片
:param name_img: 图片名字
:param boxes: box为列表
:param codes: 为列表
:return:
'''
if img is not None:
width = img.shape[0]
height = img.shape[1]
else:
assert wh is not None
width = wh[0]
height = wh[1]
# print('xml w:{} h:{}'.format(width,height))
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'VOC2007'
node_filename = SubElement(node_root, 'filename')
node_filename.text = name_img # 图片名字
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = str(width)
node_height = SubElement(node_size, 'height')
node_height.text = str(height)
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
for i, code in enumerate(codes):
box = [boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = code
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(int(box[0]))
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(int(box[1]))
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(int(box[2]))
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(int(box[3]))
xml = tostring(node_root, pretty_print=True) # 格式化显示,该换行的换行
dom = parseString(xml)
name = name_img[:-4] + '.xml'
tree = ElementTree(node_root)
from lxml import etree
# Assuming you have the root node already defined as node_root
# Format the XML and save to a file
if save_path is not None:
with open(os.path.join(save_path,name), "wb") as f:
f.write(tostring(node_root, pretty_print=True))
# print('name:{},dom:{}'.format(name, dom))
return tree, name
def cocojson2xml(json_path,save_path):
json_info = read_json(json_path)
save_path = build_dir(save_path)
info_img = json_info['images']
info_ann = json_info['annotations']
info_cat = json_info['categories']
img_id = np.array([info['id'] for info in info_img])
ann_imgid = np.array([info['image_id'] for info in info_ann])
ann_count = len(ann_imgid)
cat_id = [info['id'] for info in info_cat]
print('loading json file , json convert to dict format.............\n')
xml_info = {}
for i, id in tqdm(enumerate(img_id)):
img_ = info_img[i]
file_name = img_['file_name']
w_h = [img_['width'], img_['height']]
ann_ = [info_ann[j] for j in np.arange(ann_count)[ann_imgid==id]]
box_lst = [[a['bbox'][0],a['bbox'][1],a['bbox'][0]+a['bbox'][2],a['bbox'][1]+a['bbox'][3] ]
for a in ann_] # 将cocojson左上角转与wh转为xml的左上角与右下角坐标
catid_lst = [a['category_id'] for a in ann_]
cat_lst = [info_cat[cat_id.index(c)]['name'] for c in catid_lst]
xml_info[id] = {'file_name':file_name,'w_h':w_h,'box_lst':box_lst, 'cat_lst':cat_lst}
print('\ngenerate xml file , save xml file.............')
for k, v in tqdm(xml_info.items()):
file_name = v['file_name']
w_h = v['w_h']
box_lst = v['box_lst']
cat_lst = v['cat_lst']
name_img = get_name(file_name)
tree, xml_name = product_xml(name_img, box_lst, cat_lst, wh=w_h,save_path=save_path)
# tree.write(os.path.join(save_path, xml_name))
if __name__ == '__main__':
json_path=r"C:\Users\Administrator\Desktop\Data\coco2017\annotations\train.json"
save_path=r'C:\Users\Administrator\Desktop\Data\coco2017\123'
cocojson2xml(json_path, save_path)
coco json格式转xml格式,复制即可使用处理工具。