在目标检测的实践中,我们经常需要把一种格式的数据转换成另一种格式的数据,这里对DETRAC-Train-Images进行了解析,数据集的网址为:http://detrac-db.rit.albany.edu/download,下载
我的解析代码开源地址为:https://github.com/w5688414/datasets-preprocessing-for-object-detection
然后进行解压,我的环境是ubuntu 16.04,python3
首先从其提供的xml中,提取每张图片的voc格式的xml,我的文件名为DETRAC_xmlParser.py,代码为:
import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
import os
import cv2
import time
def ConvertVOCXml(file_path="",file_name=""):
tree = ET.parse(file_name)
root = tree.getroot()
# print(root.tag)
num=0 #计数
#读xml操作
frame_lists=[]
output_file_name=""
for child in root:
if(child.tag=="frame"):
# 创建dom文档
doc = Document()
# 创建根节点
annotation = doc.createElement('annotation')
# 根节点插入dom树
doc.appendChild(annotation)
#print(child.tag, child.attrib["num"])
pic_id= child.attrib["num"].zfill(5)
#print(pic_id)
output_file_name=root.attrib["name"]+"__img"+pic_id+".xml"
# print(output_file_name)
folder = doc.createElement("folder")
folder.appendChild(doc.createTextNode("VOC2007"))
annotation.appendChild(folder)
filename = doc.createElement("filename")
pic_name="img"+pic_id+".jpg"
filename.appendChild(doc.createTextNode(pic_name))
annotation.appendChild(filename)
sizeimage = doc.createElement("size")
imagewidth = doc.createElement("width")
imageheight = doc.createElement("height")
imagedepth = doc.createElement("depth")
imagewidth.appendChild(doc.createTextNode("960"))
imageheight.appendChild(doc.createTextNode("540"))
imagedepth.appendChild(doc.createTextNode("3"))
sizeimage.appendChild(imagedepth)
sizeimage.appendChild(imagewidth)
sizeimage.appendChild(imageheight)
annotation.appendChild(sizeimage)
target_list=child.getchildren()[0] #获取target_list
#print(target_list.tag)
object=None
for target in target_list:
if(target.tag=="target"):
#print(target.tag)
object = doc.createElement('object')
bndbox = doc.createElement("bndbox")
for target_child in target:
if(target_child.tag=="box"):
xmin = doc.createElement("xmin")
ymin = doc.createElement("ymin")
xmax = doc.createElement("xmax")
ymax = doc.createElement("ymax")
xmin_value=int(float(target_child.attrib["left"]))
ymin_value=int(float(target_child.attrib["top"]))
box_width_value=int(float(target_child.attrib["width"]))
box_height_value=int(float(target_child.attrib["height"]))
xmin.appendChild(doc.createTextNode(str(xmin_value)))
ymin.appendChild(doc.createTextNode(str(ymin_value)))
if(xmin_value+box_width_value>960):
xmax.appendChild(doc.createTextNode(str(960)))
else:
xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value)))
if(ymin_value+box_height_value>540):
ymax.appendChild(doc.createTextNode(str(540)))
else:
ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value)))
if(target_child.tag=="attribute"):
name = doc.createElement('name')
pose=doc.createElement('pose')
truncated=doc.createElement('truncated')
difficult=doc.createElement('difficult')
name.appendChild(doc.createTextNode("car"))
pose.appendChild(doc.createTextNode("Left")) #随意指定
truncated.appendChild(doc.createTextNode("0")) #随意指定
difficult.appendChild(doc.createTextNode("0")) #随意指定
object.appendChild(name)
object.appendChild(pose)
object.appendChild(truncated)
object.appendChild(difficult)
bndbox.appendChild(xmin)
bndbox.appendChild(ymin)
bndbox.appendChild(xmax)
bndbox.appendChild(ymax)
object.appendChild(bndbox)
annotation.appendChild(object)
file_path_out=os.path.join(file_path,output_file_name)
f = open(file_path_out, 'w')
f.write(doc.toprettyxml(indent=' ' * 4))
f.close()
num=num+1
return num
'''
画方框
'''
def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):
# Draw bounding box...
print(bbox)
p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
cv2.rectangle(img, p1, p2, color, thickness)
def visualization_image(image_name,xml_file_name):
tree = ET.parse(xml_file_name)
root = tree.getroot()
object_lists=[]
for child in root:
if(child.tag=="folder"):
print(child.tag, child.text)
elif (child.tag == "filename"):
print(child.tag, child.text)
elif (child.tag == "size"): #解析size
for size_child in child:
if(size_child.tag=="width"):
print(size_child.tag,size_child.text)
elif (size_child.tag == "height"):
print(size_child.tag, size_child.text)
elif (size_child.tag == "depth"):
print(size_child.tag, size_child.text)
elif (child.tag == "object"): #解析object
singleObject={}
for object_child in child:
if (object_child.tag == "name"):
# print(object_child.tag,object_child.text)
singleObject["name"] = object_child.text
elif (object_child.tag == "bndbox"):
for bndbox_child in object_child:
if (bndbox_child.tag == "xmin"):
singleObject["xmin"] = bndbox_child.text
# print(bndbox_child.tag, bndbox_child.text)
elif (bndbox_child.tag == "ymin"):
# print(bndbox_child.tag, bndbox_child.text)
singleObject["ymin"] = bndbox_child.text
elif (bndbox_child.tag == "xmax"):
singleObject["xmax"] = bndbox_child.text
elif (bndbox_child.tag == "ymax"):
singleObject["ymax"] = bndbox_child.text
object_length=len(singleObject)
if(object_length>0):
object_lists.append(singleObject)
img = cv2.imread(image_name)
for object_coordinate in object_lists:
bboxes_draw_on_img(img,object_coordinate)
cv2.imshow("capture", img)
cv2.waitKey (0)
cv2.destroyAllWindows()
if ( __name__ == "__main__"):
#print("main")
basePath="DETRAC-Train-Annotations-XML"
totalxml=os.listdir(basePath)
total_num=0
flag=False
print("正在转换")
saveBasePath="xml_test"
if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
os.makedirs(saveBasePath)
#ConvertVOCXml(file_path="samplexml",file_name="000009.xml")
# Start time
start = time.time()
log=open("xml_statistical.txt","w") #分析日志,进行排错
for xml in totalxml:
file_name=os.path.join(basePath,xml)
print(file_name)
num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
print(num)
total_num=total_num+num
log.write(file_name+" "+str(num)+"\n")
# End time
end = time.time()
seconds=end-start
print( "Time taken : {0} seconds".format(seconds))
print(total_num)
log.write(str(total_num)+"\n")
visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml")
然后根据生成的vocxml,迁移相应的图片到目标目录中,我的文件名为voc_data_migrate.py,我的代码为:
import os
import random
import shutil
#xml路径的地址
XmlPath=r'xml_test'
#原图片的地址
pictureBasePath=r"Insight-MVT_Annotation_Train"
#保存图片的地址
saveBasePath=r"picture_test"
total_xml = os.listdir(XmlPath)
num=len(total_xml)
list=range(num)
if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
os.makedirs(saveBasePath)
for xml in total_xml:
xml_temp=xml.split("__")
folder=xml_temp[0]
filename=xml_temp[1].split(".")[0]+".jpg"
# print(folder)
# print(filename)
temp_pictureBasePath=os.path.join(pictureBasePath,folder)
filePath=os.path.join(temp_pictureBasePath,filename)
# print(filePath)
newfile=xml.split(".")[0]+".jpg"
newfile_path=os.path.join(saveBasePath,newfile)
print(newfile_path)
shutil.copyfile(filePath, newfile_path)
print("xml file total number",num)
然后你想要的xml和相应的图片都生成完了,然后放在VOC相应的目录里面,关于VOC格式的目录结构,可以参考我的博文:http://blog.csdn.net/w5688414/article/details/78489064
然后利用下面的代码,代码文件名为ImageSets_Convert.py, 产生trainval.txt,test.txt,train.txt,val.txt文件,这样就可以像VOC2007那样使用这个数据集了。
import os
import random
import time
xmlfilepath=r'./VOC2007/Annotations'
saveBasePath=r"./"
trainval_percent=0.8
train_percent=0.85
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
print("train and val size",tv)
print("traub suze",tr)
ftrainval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/train.txt'), 'w')
fval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/val.txt'), 'w')
# Start time
start = time.time()
for i in list:
name=total_xml[i][:-4]+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
# End time
end = time.time()
seconds=end-start
print( "Time taken : {0} seconds".format(seconds))
ftrainval.close()
ftrain.close()
fval.close()
ftest .close()