基本步骤为:txt–>xml–>json
VisDrone 的数据格式:
(x,y,w,h,score,object_categry,truncation,occlusion)
转换成:(图片名称,目标类型,bbox坐标)
import os
from PIL import Image
# 把下面的路径改成你自己的路径即可
root_dir = "./VisDrone2019-DET-train/"
annotations_dir = root_dir+"annotations/"
image_dir = root_dir + "images/"
xml_dir = root_dir+"Annotations_XML/"
# 下面的类别也换成你自己数据类别,也可适用于其他的数据集转换
class_name = ['ignored regions','pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor','others']
for filename in os.listdir(annotations_dir):
fin = open(annotations_dir+filename, 'r')
image_name = filename.split('.')[0]
img = Image.open(image_dir+image_name+".jpg") # 若图像数据是“png”转换成“.png”即可
xml_name = xml_dir+image_name+'.xml'
with open(xml_name, 'w') as fout:
fout.write('' +'\n')
fout.write('\t'+'VOC2007 '+'\n')
fout.write('\t'+'' +image_name+'.jpg'+''+'\n')
fout.write('\t'+'+'\n')
fout.write('\t\t'+'' +'VisDrone2018 Database'+''+'\n')
fout.write('\t\t'+'' +'VisDrone2018'+''+'\n')
fout.write('\t\t'+'' +'flickr'+''+'\n')
fout.write('\t\t'+'' +'Unspecified'+''+'\n')
fout.write('\t'+''+'\n')
fout.write('\t'+'' +'\n')
fout.write('\t\t'+'' +'Haipeng Zhang'+''+'\n')
fout.write('\t\t'+'' +'Haipeng Zhang'+''+'\n')
fout.write('\t'+''+'\n')
fout.write('\t'+'' +'\n')
fout.write('\t\t'+'' +str(img.size[0])+''+'\n')
fout.write('\t\t'+'' +str(img.size[1])+''+'\n')
fout.write('\t\t'+'' +'3'+''+'\n')
fout.write('\t'+''+'\n')
fout.write('\t'+'' +'0'+''+'\n')
for line in fin.readlines():
line = line.split(',')
fout.write('\t'+'+'\n')
fout.write('\t\t'+'' +class_name[int(line[5])]+''+'\n')
fout.write('\t\t'+'' +'Unspecified'+''+'\n')
fout.write('\t\t'+'' +line[6]+''+'\n')
fout.write('\t\t'+'' +str(int(line[7]))+''+'\n')
fout.write('\t\t'+'' +'\n')
fout.write('\t\t\t'+'' +line[0]+''+'\n')
fout.write('\t\t\t'+'' +line[1]+''+'\n')
# pay attention to this point!(0-based)
fout.write('\t\t\t'+'' +str(int(line[0])+int(line[2])-1)+''+'\n')
fout.write('\t\t\t'+'' +str(int(line[1])+int(line[3])-1)+''+'\n')
fout.write('\t\t'+''+'\n')
fout.write('\t'+''+'\n')
fin.close()
fout.write('')
代码如下:
import os
import os.path
import numpy as np
import xml.etree.ElementTree as xmlET
from PIL import Image, ImageDraw
classes = ('__background__', # always index 0
'ignored regions','pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor','others')
# 把下面的路径改为自己的路径即可
file_path_img = './VisDrone2019-DET-train/images'
file_path_xml = './VisDrone2019-DET-train/Annotations_XML'
save_file_path = './VisDrone2019-DET-train/output'
pathDir = os.listdir(file_path_xml)
for idx in range(len(pathDir)):
filename = pathDir[idx]
tree = xmlET.parse(os.path.join(file_path_xml, filename))
objs = tree.findall('object')
num_objs = len(objs)
boxes = np.zeros((num_objs, 5), dtype=np.uint16)
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
cla = obj.find('name').text
label = classes.index(cla)
boxes[ix, 0:4] = [x1, y1, x2, y2]
boxes[ix, 4] = label
image_name = os.path.splitext(filename)[0]
img = Image.open(os.path.join(file_path_img, image_name + '.jpg'))
draw = ImageDraw.Draw(img)
for ix in range(len(boxes)):
xmin = int(boxes[ix, 0])
ymin = int(boxes[ix, 1])
xmax = int(boxes[ix, 2])
ymax = int(boxes[ix, 3])
draw.rectangle([xmin, ymin, xmax, ymax], outline=(255, 0, 0))
draw.text([xmin, ymin], classes[boxes[ix, 4]], (255, 0, 0))
img.save(os.path.join(save_file_path, image_name + '.png'))
使用上面的代码可以使用xml标注格式画回原图中!
代码如下,下面有两种代码提供使用转换。
方法一、
# -*- coding:utf-8 -*-
# !/usr/bin/env python
import argparse
import json
import matplotlib.pyplot as plt
import skimage.io as io
import cv2
from labelme import utils
import numpy as np
import glob
import PIL.Image
import os,sys
class PascalVOC2coco(object):
def __init__(self, xml=[], save_json_path='./new.json'):
'''
:param xml: 所有Pascal VOC的xml文件路径组成的列表
:param save_json_path: json保存位置
'''
self.xml = xml
self.save_json_path = save_json_path
self.images = []
self.categories = []
self.annotations = []
# self.data_coco = {}
self.label = []
self.annID = 1
self.height = 0
self.width = 0
self.ob = []
self.save_json()
def data_transfer(self):
for num, json_file in enumerate(self.xml):
# 进度输出
sys.stdout.write('\r>> Converting image %d/%d' % (
num + 1, len(self.xml)))
sys.stdout.flush()
self.json_file = json_file
#print("self.json", self.json_file)
self.num = num
#print(self.num)
path = os.path.dirname(self.json_file)
#print(path)
path = os.path.dirname(path)
#print(path)
# path=os.path.split(self.json_file)[0]
# path=os.path.split(path)[0]
obj_path = glob.glob(os.path.join(path, 'SegmentationObject', '*.png'))
#print(obj_path)
with open(json_file, 'r') as fp:
#print(fp)
flag = 0
for p in fp:
#print(p)
# if 'folder' in p:
# folder =p.split('>')[1].split('<')[0]
f_name = 1
if 'filename' in p:
self.filen_ame = p.split('>')[1].split('<')[0]
#print(self.filen_ame)
f_name = 0
self.path = os.path.join(path, 'SegmentationObject', self.filen_ame.split('.')[0] + '.png')
#if self.path not in obj_path:
# break
if 'width' in p:
self.width = int(p.split('>')[1].split('<')[0])
#print(self.width)
if 'height' in p:
self.height = int(p.split('>')[1].split('<')[0])
self.images.append(self.image())
#print(self.image())
if flag == 1:
self.supercategory = self.ob[0]
if self.supercategory not in self.label:
self.categories.append(self.categorie())
self.label.append(self.supercategory)
# 边界框
x1 = int(self.ob[-4]);
y1 = int(self.ob[-3]);
x2 = int(self.ob[-2]);
y2 = int(self.ob[-1])
self.rectangle = [x1, y1, x2, y2]
self.bbox = [x1, y1, x2 - x1, y2 - y1] # COCO 对应格式[x,y,w,h]
self.annotations.append(self.annotation())
self.annID += 1
self.ob = []
flag = 0
elif f_name == 1:
if 'name' in p:
self.ob.append(p.split('>')[1].split('<')[0])
if 'xmin' in p:
self.ob.append(p.split('>')[1].split('<')[0])
if 'ymin' in p:
self.ob.append(p.split('>')[1].split('<')[0])
if 'xmax' in p:
self.ob.append(p.split('>')[1].split('<')[0])
if 'ymax' in p:
self.ob.append(p.split('>')[1].split('<')[0])
flag = 1
'''
if '
sys.stdout.write('\n')
sys.stdout.flush()
def image(self):
image = {}
image['height'] = self.height
image['width'] = self.width
image['id'] = self.num + 1
image['file_name'] = self.filen_ame
return image
def categorie(self):
categorie = {}
categorie['supercategory'] = self.supercategory
categorie['id'] = len(self.label) + 1 # 0 默认为背景
categorie['name'] = self.supercategory
return categorie
def annotation(self):
annotation = {}
# annotation['segmentation'] = [self.getsegmentation()]
annotation['segmentation'] = [list(map(float, self.getsegmentation()))]
annotation['iscrowd'] = 0
annotation['image_id'] = self.num + 1
# annotation['bbox'] = list(map(float, self.bbox))
annotation['bbox'] = self.bbox
annotation['category_id'] = self.getcatid(self.supercategory)
annotation['id'] = self.annID
return annotation
def getcatid(self, label):
for categorie in self.categories:
if label == categorie['name']:
return categorie['id']
return -1
def getsegmentation(self):
try:
mask_1 = cv2.imread(self.path, 0)
mask = np.zeros_like(mask_1, np.uint8)
rectangle = self.rectangle
mask[rectangle[1]:rectangle[3], rectangle[0]:rectangle[2]] = mask_1[rectangle[1]:rectangle[3],
rectangle[0]:rectangle[2]]
# 计算矩形中点像素值
mean_x = (rectangle[0] + rectangle[2]) // 2
mean_y = (rectangle[1] + rectangle[3]) // 2
end = min((mask.shape[1], int(rectangle[2]) + 1))
start = max((0, int(rectangle[0]) - 1))
flag = True
for i in range(mean_x, end):
x_ = i;
y_ = mean_y
pixels = mask_1[y_, x_]
if pixels != 0 and pixels != 220: # 0 对应背景 220对应边界线
mask = (mask == pixels).astype(np.uint8)
flag = False
break
if flag:
for i in range(mean_x, start, -1):
x_ = i;
y_ = mean_y
pixels = mask_1[y_, x_]
if pixels != 0 and pixels != 220:
mask = (mask == pixels).astype(np.uint8)
break
self.mask = mask
return self.mask2polygons()
except:
return [0]
def mask2polygons(self):
contours = cv2.findContours(self.mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # 找到轮廓线
bbox=[]
for cont in contours[1]:
[bbox.append(i) for i in list(cont.flatten())]
# map(bbox.append,list(cont.flatten()))
return bbox # list(contours[1][0].flatten())
# '''
def getbbox(self, points):
# img = np.zeros([self.height,self.width],np.uint8)
# cv2.polylines(img, [np.asarray(points)], True, 1, lineType=cv2.LINE_AA) # 画边界线
# cv2.fillPoly(img, [np.asarray(points)], 1) # 画多边形 内部像素值为1
polygons = points
mask = self.polygons_to_mask([self.height, self.width], polygons)
return self.mask2box(mask)
def mask2box(self, mask):
'''从mask反算出其边框
mask:[h,w] 0、1组成的图片
1对应对象,只需计算1对应的行列号(左上角行列号,右下角行列号,就可以算出其边框)
'''
# np.where(mask==1)
index = np.argwhere(mask == 1)
rows = index[:, 0]
clos = index[:, 1]
# 解析左上角行列号
left_top_r = np.min(rows) # y
left_top_c = np.min(clos) # x
# 解析右下角行列号
right_bottom_r = np.max(rows)
right_bottom_c = np.max(clos)
# return [(left_top_r,left_top_c),(right_bottom_r,right_bottom_c)]
# return [(left_top_c, left_top_r), (right_bottom_c, right_bottom_r)]
# return [left_top_c, left_top_r, right_bottom_c, right_bottom_r] # [x1,y1,x2,y2]
return [left_top_c, left_top_r, right_bottom_c - left_top_c,
right_bottom_r - left_top_r] # [x1,y1,w,h] 对应COCO的bbox格式
def polygons_to_mask(self, img_shape, polygons):
mask = np.zeros(img_shape, dtype=np.uint8)
mask = PIL.Image.fromarray(mask)
xy = list(map(tuple, polygons))
PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
mask = np.array(mask, dtype=bool)
return mask
# '''
def data2coco(self):
data_coco = {}
data_coco['images'] = self.images
data_coco['categories'] = self.categories
data_coco['annotations'] = self.annotations
return data_coco
def save_json(self):
self.data_transfer()
self.data_coco = self.data2coco()
# 保存json文件
json.dump(self.data_coco, open(self.save_json_path, 'w'), indent=4) # indent=4 更加美观显示
xml_file = glob.glob('./VisDrone2019-DET-train/Annotations_XML/0000002_00005_d_0000014.xml')
# xml_file=['./Annotations/000032.xml']
PascalVOC2coco(xml_file, './VisDrone2019-DET-train/Annotations/train.json')
在转换过程中遇到的错误
1、遇到的报错:xml.etree.ElementTree.ParseError: syntax error: line 1, column 0
原因是:在处理大规模数据集的时候可能会出现这个错误,将错误的xml删去就可以了!2、遇到的报错:TypeError: expected str, bytes or os.PathLike object, not list
(类型错误:应为str、bytes或os.PathLike对象,而不是list)
方法二、 在运行前需要把路径和参数设置好。
(注意,这里默认划分的是训练、验证和测试集,如果你不需要测试集,只需要简单的改下代码即可)
# coding:utf-8
# 运行前请先做以下工作:
# pip install lxml
# 将所有的图片及xml文件存放到xml_dir指定的文件夹下,并将此文件夹放置到当前目录下
#
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
START_BOUNDING_BOX_ID = 1
save_path = "."
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = get(root, name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
if length and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
# print("Processing %s"%(line))
xml_f = line
tree = ET.parse(xml_f)
root = tree.getroot()
filename = os.path.basename(xml_f)[:-4] + ".jpg"
image_id = 20190000001 + index
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
json_dict['images'].append(image)
# Currently we do not support segmentation
segmented = get_and_check(root, 'segmented', 1).text
assert segmented == '0'
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
continue
new_id = len(categories) + 1
print(
"[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
assert (xmax > xmin), "xmax <= xmin, {}".format(line)
assert (ymax > ymin), "ymax <= ymin, {}".format(line)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'food', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict)
json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
all_categories.keys(),
len(pre_define_categories),
pre_define_categories.keys()))
print("category: id --> {}".format(categories))
print(categories.keys())
print(categories.values())
if __name__ == '__main__':
# 定义你自己的类别
classes = ['aaa', 'bbb', 'ccc', 'ddd', 'eee', 'fff']
pre_define_categories = {}
for i, cls in enumerate(classes):
pre_define_categories[cls] = i + 1
# 这里也可以自定义类别id,把上面的注释掉换成下面这行即可
# pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
only_care_pre_define_categories = True # or False
# 保存的json文件
save_json_train = 'train_food.json'
save_json_val = 'val_food.json'
save_json_test = 'test_food.json'
# 初始文件所在的路径
xml_dir = "./image_and_xml"
xml_list = glob.glob(xml_dir + "/*.xml")
xml_list = np.sort(xml_list)
# 打乱数据集
np.random.seed(100)
np.random.shuffle(xml_list)
# 按比例划分打乱后的数据集
train_ratio = 0.8
val_ratio = 0.1
train_num = int(len(xml_list) * train_ratio)
val_num = int(len(xml_list) * val_ratio)
xml_list_train = xml_list[:train_num]
xml_list_val = xml_list[train_num: train_num+val_num]
xml_list_test = xml_list[train_num+val_num:]
# 将xml文件转为coco文件,在指定目录下生成三个json文件(train/test/food)
convert(xml_list_train, save_json_train)
convert(xml_list_val, save_json_val)
convert(xml_list_test, save_json_test)
# # 将图片按照划分后的结果进行存放
# if os.path.exists(save_path + "/annotations"):
# shutil.rmtree(save_path + "/annotations")
# os.makedirs(save_path + "/annotations")
# if os.path.exists(save_path + "/images_divide/train"):
# shutil.rmtree(save_path + "/images_divide/train")
# os.makedirs(save_path + "/images_divide/train")
# if os.path.exists(save_path + "/images_divide/val"):
# shutil.rmtree(save_path + "/images_divide/val")
# os.makedirs(save_path + "/images_divide/val")
# if os.path.exists(save_path + "/images_divide/test"):
# shutil.rmtree(save_path + "/images_divide/test")
# os.makedirs(save_path + "/images_divide/test")
# # 按需执行,生成3个txt文件,存放相应的文件名称
# f1 = open("./train.txt", "w")
# for xml in xml_list_train:
# img = xml[:-4] + ".jpg"
# f1.write(os.path.basename(xml)[:-4] + "\n")
# shutil.copyfile(img, save_path + "/images_divide/train/" + os.path.basename(img))
#
# f2 = open("val.txt", "w")
# for xml in xml_list_val:
# img = xml[:-4] + ".jpg"
# f2.write(os.path.basename(xml)[:-4] + "\n")
# shutil.copyfile(img, save_path + "/images_divide/val/" + os.path.basename(img))
#
# f3 = open("test.txt", "w")
# for xml in xml_list_val:
# img = xml[:-4] + ".jpg"
# f2.write(os.path.basename(xml)[:-4] + "\n")
# shutil.copyfile(img, save_path + "/images_divide/test/" + os.path.basename(img))
#
# f1.close()
# f2.close()
# f3.close()
print("-" * 50)
print("train number:", len(xml_list_train))
print("val number:", len(xml_list_val))
print("test number:", len(xml_list_val))
详细步骤参考
计算所有的图片(包括训练、验证和测试)的均值和标准差,直接将图片存放到同一个文件夹,把路径改下即可。
import cv2, os, argparse
import numpy as np
from tqdm import tqdm
def main():
dirs = r'./images' # 修改你自己的图片路径
img_file_names = os.listdir(dirs)
m_list, s_list = [], []
for img_filename in tqdm(img_file_names):
img = cv2.imread(dirs + '/' + img_filename)
img = img / 255.0
m, s = cv2.meanStdDev(img)
m_list.append(m.reshape((3,)))
s_list.append(s.reshape((3,)))
m_array = np.array(m_list)
s_array = np.array(s_list)
m = m_array.mean(axis=0, keepdims=True)
s = s_array.mean(axis=0, keepdims=True)
print("mean = ", m[0][::-1])
print("std = ", s[0][::-1])
if __name__ == '__main__':
main()
实验结果如下面所示:
mean = [0.37867413 0.38535597 0.37134552]
std = [0.19151453 0.1829674 0.19446535]