在目标检测中我们可能会碰到很多种不同格式的标注文件,比如xml,txt,json等格式文件。这个时候就会涉及到不同格式之间的数据进行转换。笔者自己最近也在搞这些相关的数据处理,整理总结了一下。
用labelme可以直接标注成json格式文件,是coco数据集的标注格式。针对每个json格式文件,读取标注信息,然后根据每张图片生成对应的xml格式文件。
import os
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
#1.标签路径
labelme_path = "./labelme/" #使用labelme打的标签(包含每一张照片和对应json格式的标签)
saved_path = "./VOCdevkit/VOC2007/" #保存路径
#2.voc格式的文件夹,如果没有,就创建一个
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
os.makedirs(saved_path + "ImageSets/Main/")
#3.获取json文件
files = glob(labelme_path + "*.json")
files = [i.split("/")[-1].split(".json")[0] for i in files] #获取每一个json文件名
#4.读取每一张照片和对应标签,生成xml
for json_file_ in files:
json_filename = labelme_path + json_file_ + ".json"
json_file = json.load(open(json_filename,"r",encoding="utf-8"))
height, width, channels = cv2.imread(labelme_path + json_file_ +".jpg").shape
with codecs.open(saved_path + "Annotations/"+json_file_ + ".xml","w","utf-8") as xml:
xml.write('\n' )
xml.write('\t' + 'UAV_data' + '\n')
xml.write('\t' + json_file_ + ".jpg" + '\n')
xml.write('\t)
xml.write('\t\tThe UAV autolanding \n')
xml.write('\t\tUAV AutoLanding \n')
xml.write('\t\tflickr \n')
xml.write('\t\tNULL \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\tNULL \n')
xml.write('\t\tChaojieZhu \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\t' + str(width) + '\n')
xml.write('\t\t' + str(height) + '\n')
xml.write('\t\t' + str(channels) + '\n')
xml.write('\t\n')
xml.write('\t\t0 \n')
for multi in json_file["shapes"]:
points = np.array(multi["points"])
xmin = min(points[:,0])
xmax = max(points[:,0])
ymin = min(points[:,1])
ymax = max(points[:,1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t)
xml.write('\t\t' +label+'\n')
xml.write('\t\tUnspecified \n')
xml.write('\t\t1 \n')
xml.write('\t\t0 \n')
xml.write('\t\t\n' )
xml.write('\t\t\t' + str(xmin) + '\n')
xml.write('\t\t\t' + str(ymin) + '\n')
xml.write('\t\t\t' + str(xmax) + '\n')
xml.write('\t\t\t' + str(ymax) + '\n')
xml.write('\t\t\n')
xml.write('\t\n')
print(json_filename,xmin,ymin,xmax,ymax,label)
xml.write('')
#5.复制图片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
shutil.copy(image,saved_path +"JPEGImages/")
#6.划分train,test,val格式数据集
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath+'/trainval.txt', 'w')
ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')
total_files = glob("./VOC2007/Annotations/*.xml")
total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]
#test_filepath = ""
for file in total_files:
ftrainval.write(file + "\n")
#test
#for file in os.listdir(test_filepath):
# ftest.write(file.split(".jpg")[0] + "\n")
#split,根据test_size这个参数来确定test的数量
train_files,val_files = train_test_split(total_files,test_size=0.15,random_state=42)
#train
for file in train_files:
ftrain.write(file + "\n")
#val
for file in val_files:
fval.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
#ftest.close()
用labelimg可以直接标注成xml格式,xml是voc格式的数据,也是我们一般的标注格式。txt是yolo和大部分数据集的标注格式,所以xml文件转成txt格式就很关键。
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=['train', 'test']
classes = ["safety_helmet", "head"]#这里输入你的数据集类别
def convert(size, box):
#读取xml文件中的数据,xywh
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
in_file = open('police_labels/%s.xml'%(image_id))#这里是读取xml的文件夹
out_file = open('labels/%s.txt'%(image_id), 'w')#存入txt文件的文件夹
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
# labels文件夹专门存储转换好的txt文件,如果没有则反注释下面的代码
# if not os.path.exists('labels/'):
# os.makedirs('labels/')
image_ids = open('ImageSets/Main/%s.txt'%(image_set)).read().strip().split()#读取train.txt或者test.txt从而找到每个xml文件的文件名,这里的train.txt中仅包含文件名,不包好路径。
list_file = open('%s.txt'%(image_set), 'w')
for image_id in image_ids:
list_file.write('/root/object-detection/yolov5-master/data/police_obj/images/%s.jpg\n'%(image_id))#从写train.txt或者test.txt文件,把图片文件的绝对路径写入,方便读取图片
convert_annotation(image_id)
list_file.close()
有的时候也需要把txt格式转换成coco格式,但是由于txt格式文件是不带标签名的,所以我们先转成xml,再由xml转成coco。
import cv2
import os
labels = ['A', 'B', 'C'] # 数据集类别名
xml_head = '''
VOC2007
{} .
null
null
{}
{}
{}
0
'''
xml_obj = '''
'''
xml_end = '''
'''
cnt = 0
with open('train.txt', 'r') as train_list: # 训练数据train.txt或test.txt,其中包含图片路径
for lst in train_list.readlines():
lst = lst.strip()
jpg = lst # image path
txt1 = lst.replace('images', 'labels')
txt = lst.replace('.jpg', '.txt') # yolo label txt path
xml_path1 = jpg.replace('images', 'xmllabels')
xml_path = jpg.replace('.jpg', '.xml')
# xml保存路径,此时images,labels,xmllabels必须在一个文件目录下,images存放图片,labels存放txt文件。
obj = ''
img = cv2.imread(jpg)
img_h, img_w = img.shape[0], img.shape[1]
head = xml_head.format(str(jpg), str(img_w), str(img_h))
with open(txt, 'r') as f:
for line in f.readlines():
yolo_datas = line.strip().split(' ')
label = int(float(yolo_datas[0].strip()))
center_x = round(float(str(yolo_datas[1]).strip()) * img_w)
center_y = round(float(str(yolo_datas[2]).strip()) * img_h)
bbox_width = round(float(str(yolo_datas[3]).strip()) * img_w)
bbox_height = round(float(str(yolo_datas[4]).strip()) * img_h)
xmin = str(int(center_x - bbox_width / 2))
ymin = str(int(center_y - bbox_height / 2))
xmax = str(int(center_x + bbox_width / 2))
ymax = str(int(center_y + bbox_height / 2))
obj += xml_obj.format(labels[label], xmin, ymin, xmax, ymax)
with open(xml_path, 'w') as f_xml:
f_xml.write(head + obj + xml_end)
cnt += 1
print(cnt)
目的是把xml数据和图片都存到当前目录的annotations中。
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
path2 = "."#当前该文件路径
START_BOUNDING_BOX_ID = 1
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.'%(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
# print("Processing %s"%(line))
xml_f = line
tree = ET.parse(xml_f)
root = tree.getroot()
filename = os.path.basename(xml_f)[:-4] + ".jpg"
image_id = 1 + index
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id':image_id}
json_dict['images'].append(image)
## Cruuently we do not support segmentation
# segmented = get_and_check(root, 'segmented', 1).text
# assert segmented == '0'
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
continue
new_id = len(categories) + 1
print("[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
assert(xmax > xmin), "xmax <= xmin, {}".format(line)
assert(ymax > ymin), "ymax <= ymin, {}".format(line)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width*o_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox':[xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)#将生成的json文件加入annotations文件夹
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict)
json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories), all_categories.keys(), len(pre_define_categories), pre_define_categories.keys()))
print("category: id --> {}".format(categories))
print(categories.keys())
print(categories.values())
if __name__ == '__main__':
classes = ['person']
pre_define_categories = {}
for i, cls in enumerate(classes):
pre_define_categories[cls] = i + 1
# pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
only_care_pre_define_categories = True
# only_care_pre_define_categories = False
train_ratio = 1#控制train和val的比例,train_ratio=1是全部生成为train数据
save_json_train = 'instances_train2014.json'#生成训练集json文件名
save_json_val = 'instances_val2014.json'
xml_dir = "Annotations" #存放xml文件的文件夹
xml_list = glob.glob(xml_dir + "/*.xml")
xml_list = np.sort(xml_list)
np.random.seed(100)
np.random.shuffle(xml_list)
train_num = int(len(xml_list)*train_ratio)
xml_list_train = xml_list[:train_num]
xml_list_val = xml_list[train_num:]
convert(xml_list_train, save_json_train)
convert(xml_list_val, save_json_val)
if os.path.exists(path2 + "/annotations"):
shutil.rmtree(path2 + "/annotations")
os.makedirs(path2 + "/annotations")
if os.path.exists(path2 + "/images/train2014"):
shutil.rmtree(path2 + "/images/train2014")
os.makedirs(path2 + "/images/train2014")
if os.path.exists(path2 + "/images/val2014"):
shutil.rmtree(path2 +"/images/val2014")
os.makedirs(path2 + "/images/val2014")
f1 = open("train.txt", "w")
for xml in xml_list_train:
img = xml[:-4] + ".jpg"#根据xml文件路径获取图片路径,此时图片和xml文件都在annotations文件夹中
f1.write(os.path.basename(xml)[:-4] + "\n")
shutil.copyfile(img, path2 + "/images/train2014/" + os.path.basename(img))#将用于训练的图片存入训练集
f2 = open("test.txt", "w")
for xml in xml_list_val:
img = xml[:-4] + ".jpg"
f2.write(os.path.basename(xml)[:-4] + "\n")
shutil.copyfile(img, path2 + "/images/val2014/" + os.path.basename(img))#将用于测试的图片存入测试集
f1.close()
f2.close()
print("-------------------------------")
print("train number:", len(xml_list_train))
print("val number:", len(xml_list_val))