DETRAC-Train-Images转换成VOC格式的数据集

在目标检测的实践中,我们经常需要把一种格式的数据转换成另一种格式的数据,这里对DETRAC-Train-Images进行了解析,数据集的网址为:http://detrac-db.rit.albany.edu/download,下载

  • DETRAC-Train-Images (5.22GB, 60 sequences)
  • DETRAC-Train-Annotations-XML

我的解析代码开源地址为:https://github.com/w5688414/datasets-preprocessing-for-object-detection

然后进行解压,我的环境是ubuntu 16.04,python3

首先从其提供的xml中,提取每张图片的voc格式的xml,我的文件名为DETRAC_xmlParser.py,代码为:

 

import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
import os
import cv2
import time

def ConvertVOCXml(file_path="",file_name=""):
   tree = ET.parse(file_name)
   root = tree.getroot()
   # print(root.tag)

   num=0 #计数
   #读xml操作

   frame_lists=[]
   output_file_name=""
   for child in root:

      if(child.tag=="frame"):
          # 创建dom文档
         doc = Document()
         # 创建根节点
         annotation = doc.createElement('annotation')
         # 根节点插入dom树
         doc.appendChild(annotation)

         #print(child.tag, child.attrib["num"])
         pic_id= child.attrib["num"].zfill(5)
         #print(pic_id)
         output_file_name=root.attrib["name"]+"__img"+pic_id+".xml"
        #  print(output_file_name)

         folder = doc.createElement("folder")
         folder.appendChild(doc.createTextNode("VOC2007"))
         annotation.appendChild(folder)

         filename = doc.createElement("filename")
         pic_name="img"+pic_id+".jpg"
         filename.appendChild(doc.createTextNode(pic_name))
         annotation.appendChild(filename)

         sizeimage = doc.createElement("size")
         imagewidth = doc.createElement("width")
         imageheight = doc.createElement("height")
         imagedepth = doc.createElement("depth")

         imagewidth.appendChild(doc.createTextNode("960"))
         imageheight.appendChild(doc.createTextNode("540"))
         imagedepth.appendChild(doc.createTextNode("3"))

         sizeimage.appendChild(imagedepth)
         sizeimage.appendChild(imagewidth)
         sizeimage.appendChild(imageheight)
         annotation.appendChild(sizeimage)

         target_list=child.getchildren()[0]  #获取target_list
         #print(target_list.tag)
         object=None
         for target in target_list:
             if(target.tag=="target"):
                 #print(target.tag)
                 object = doc.createElement('object')
                 bndbox = doc.createElement("bndbox")

                 for target_child in target:
                     if(target_child.tag=="box"):
                         xmin = doc.createElement("xmin")
                         ymin = doc.createElement("ymin")
                         xmax = doc.createElement("xmax")
                         ymax = doc.createElement("ymax")
                         xmin_value=int(float(target_child.attrib["left"]))
                         ymin_value=int(float(target_child.attrib["top"]))
                         box_width_value=int(float(target_child.attrib["width"]))
                         box_height_value=int(float(target_child.attrib["height"]))
                         xmin.appendChild(doc.createTextNode(str(xmin_value)))
                         ymin.appendChild(doc.createTextNode(str(ymin_value)))
                         if(xmin_value+box_width_value>960):
                            xmax.appendChild(doc.createTextNode(str(960)))
                         else:
                            xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value)))
                         if(ymin_value+box_height_value>540):
                            ymax.appendChild(doc.createTextNode(str(540)))
                         else:
                            ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value)))

                     if(target_child.tag=="attribute"):
                         name = doc.createElement('name')
                         pose=doc.createElement('pose')
                         truncated=doc.createElement('truncated')
                         difficult=doc.createElement('difficult')

                         name.appendChild(doc.createTextNode("car"))
                         pose.appendChild(doc.createTextNode("Left"))  #随意指定
                         truncated.appendChild(doc.createTextNode("0"))  #随意指定
                         difficult.appendChild(doc.createTextNode("0"))  #随意指定

                         
                         object.appendChild(name)
                         object.appendChild(pose)
                         object.appendChild(truncated)
                         object.appendChild(difficult)
                         
                 bndbox.appendChild(xmin)
                 bndbox.appendChild(ymin)
                 bndbox.appendChild(xmax)
                 bndbox.appendChild(ymax)
                 object.appendChild(bndbox)
                 annotation.appendChild(object)


         file_path_out=os.path.join(file_path,output_file_name)
         f = open(file_path_out, 'w')
         f.write(doc.toprettyxml(indent=' ' * 4))
         f.close()
         num=num+1
   return num




'''
画方框
'''
def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):

    # Draw bounding box...
    print(bbox)
    p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
    p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
    cv2.rectangle(img, p1, p2, color, thickness)


def visualization_image(image_name,xml_file_name):
    tree = ET.parse(xml_file_name)
    root = tree.getroot()

    object_lists=[]
    for child in root:
       if(child.tag=="folder"):
          print(child.tag, child.text)
       elif (child.tag == "filename"):
          print(child.tag, child.text)
       elif (child.tag == "size"):  #解析size
          for size_child in child:
             if(size_child.tag=="width"):
                print(size_child.tag,size_child.text)
             elif (size_child.tag == "height"):
                print(size_child.tag, size_child.text)
             elif (size_child.tag == "depth"):
                print(size_child.tag, size_child.text)
       elif (child.tag == "object"):  #解析object
          singleObject={}
          for object_child in child:
             if (object_child.tag == "name"):
                # print(object_child.tag,object_child.text)
                singleObject["name"] = object_child.text
             elif (object_child.tag == "bndbox"):
                for bndbox_child in object_child:
                   if (bndbox_child.tag == "xmin"):
                      singleObject["xmin"] = bndbox_child.text
                      # print(bndbox_child.tag, bndbox_child.text)
                   elif (bndbox_child.tag == "ymin"):
                      # print(bndbox_child.tag, bndbox_child.text)
                      singleObject["ymin"] = bndbox_child.text
                   elif (bndbox_child.tag == "xmax"):
                      singleObject["xmax"] = bndbox_child.text
                   elif (bndbox_child.tag == "ymax"):
                      singleObject["ymax"] = bndbox_child.text
          object_length=len(singleObject)
          if(object_length>0):
          	object_lists.append(singleObject)
    img = cv2.imread(image_name)
    for object_coordinate in object_lists:
        bboxes_draw_on_img(img,object_coordinate)
    cv2.imshow("capture", img)
    cv2.waitKey (0)
    cv2.destroyAllWindows()


if ( __name__ == "__main__"):
   #print("main")
   basePath="DETRAC-Train-Annotations-XML"
   totalxml=os.listdir(basePath)
   total_num=0
   flag=False
   print("正在转换")
   saveBasePath="xml_test"
   if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
        os.makedirs(saveBasePath)

   #ConvertVOCXml(file_path="samplexml",file_name="000009.xml")
   # Start time
   start = time.time()
   log=open("xml_statistical.txt","w") #分析日志,进行排错
   for xml in totalxml:
     file_name=os.path.join(basePath,xml)
     print(file_name)
     num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
     print(num)
     total_num=total_num+num
     log.write(file_name+" "+str(num)+"\n")
   # End time
   end = time.time()
   seconds=end-start
   print( "Time taken : {0} seconds".format(seconds))
   print(total_num)
   log.write(str(total_num)+"\n")
   visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml")


然后根据生成的vocxml,迁移相应的图片到目标目录中,我的文件名为voc_data_migrate.py,我的代码为:

 

 

import os
import random
import shutil

#xml路径的地址
XmlPath=r'xml_test'
#原图片的地址
pictureBasePath=r"Insight-MVT_Annotation_Train"
#保存图片的地址
saveBasePath=r"picture_test"

total_xml = os.listdir(XmlPath)
num=len(total_xml)
list=range(num)
if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
     os.makedirs(saveBasePath)


for xml in total_xml:
    xml_temp=xml.split("__")
    folder=xml_temp[0]
    filename=xml_temp[1].split(".")[0]+".jpg"
    # print(folder)
    # print(filename)
    temp_pictureBasePath=os.path.join(pictureBasePath,folder)
    filePath=os.path.join(temp_pictureBasePath,filename)
    # print(filePath)
    newfile=xml.split(".")[0]+".jpg"
    newfile_path=os.path.join(saveBasePath,newfile)
    print(newfile_path)
    shutil.copyfile(filePath, newfile_path)
print("xml file total number",num)

 

然后你想要的xml和相应的图片都生成完了,然后放在VOC相应的目录里面,关于VOC格式的目录结构,可以参考我的博文:http://blog.csdn.net/w5688414/article/details/78489064

 

然后利用下面的代码,代码文件名为ImageSets_Convert.py, 产生trainval.txt,test.txt,train.txt,val.txt文件,这样就可以像VOC2007那样使用这个数据集了。

 

import os
import random
import time

xmlfilepath=r'./VOC2007/Annotations'
saveBasePath=r"./"

trainval_percent=0.8
train_percent=0.85
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)

print("train and val size",tv)
print("traub suze",tr)
ftrainval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/train.txt'), 'w')
fval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/val.txt'), 'w')
# Start time
start = time.time()
for i  in list:
    name=total_xml[i][:-4]+'\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)
# End time
end = time.time()
seconds=end-start
print( "Time taken : {0} seconds".format(seconds))

ftrainval.close()
ftrain.close()
fval.close()
ftest .close()

 

 

 

 

 

 

你可能感兴趣的:(目标检测)