把数据集NWPU VHR-10转成pascal voc的格式
我运行了参考的代码以后,发现有以下几点不足:
基于此,我将参考代码进行了改进,思路如下:
首先应该将原来的标注文件txt转成xml,我的代码是把标注文件和图片放在了同一个文件夹下面,以下备注的时候简称f1,然后调用deal()将标注文件转成xml文件;
然后调用imag_rename()将原来的图片重新命名,其中oldpath是原来的图片存放的路径,new_path是新的存放路径,这里之所以这样做是怕把文件搞乱了,不利用程序的debug
import shutil
from lxml.etree import Element,SubElement,tostring
from xml.dom.minidom import parseString
import xml.dom.minidom
import os
import sys
from PIL import Image
# 该代码主要解决的是图片的重命名问题,因为voc的图片是从0开始,而且是6位数
def imag_rename(old_path, new_path):
filelist = os.listdir(old_path) # 该文件夹下所有的文件(包括文件夹)
os.mkdir(new_path)
for file in filelist: # 遍历所有文件
Olddir = os.path.join(old_path, file) # 原来的文件路径
if os.path.isdir(Olddir): # 如果是文件夹则跳过
continue
filename = os.path.splitext(file)[0] # 文件名
filetype = os.path.splitext(file)[1] # 文件扩展名
if filetype == '.jpg':
Newdir = os.path.join(new_path, str(int(filename)-1).zfill(6) + filetype)
# 用字符串函数zfill 以0补全所需位数
shutil.copyfile(Olddir, Newdir)
def deal(path):
files=os.listdir(path)
# 此处可以自行设置输出路径
outpath = path + "/output"
os.mkdir(outpath)
for file in files:
filename=os.path.splitext(file)[0]#
#print(filename)
sufix=os.path.splitext(file)[1]#
if sufix=='.txt':
xmins=[]
ymins=[]
xmaxs=[]
ymaxs=[]
names=[]
num,xmins,ymins,xmaxs,ymaxs,names=readtxt(path + '/' + file)
filename_fill = str(int(filename) - 1).zfill(6)
filename_jpg = filename_fill + ".jpg"
dealpath=path+"/output/"+ filename_fill +".xml"
imagpath = path +"/" + filename + ".jpg"
with open(dealpath, 'w') as f:
height, width = readsize(imagpath)
writexml(dealpath,filename_jpg,num,xmins,ymins,xmaxs,ymaxs,names, height, width)
def readtxt(path):
with open(path, 'r') as f:
contents = f.read()
#print(contents)
objects=contents.split('\n')
for i in range(objects.count('')):
objects.remove('')
#print(objects)
num=len(objects)#
#print(num)
xmins=[]
ymins=[]
xmaxs=[]
ymaxs=[]
names=[]
for objecto in objects:
#print(objecto)
xmin=objecto.split(',')[0]
xmin=xmin.split('(')[1]
xmin=xmin.strip()
ymin=objecto.split(',')[1]
ymin=ymin.split(')')[0]
ymin=ymin.strip()
xmax=objecto.split(',')[2]
xmax=xmax.split('(')[1]
xmax=xmax.strip()
ymax=objecto.split(',')[3]
ymax=ymax.split(')')[0]
ymax=ymax.strip()
name=objecto.split(',')[4]
name=name.strip()
if name=="1 " or name=="1":
name='airplane'
elif name=="2 "or name=="2":
name='ship'
elif name== "3 "or name=="3":
name='storage tank'
elif name=="4 "or name=="4":
name='baseball diamond'
elif name=="5 "or name=="5":
name='tennis court'
elif name=="6 "or name=="6":
name='basketball court'
elif name=="7 "or name=="7":
name='ground track field'
elif name=="8 "or name=="8":
name='harbor'
elif name=="9 "or name=="9":
name='bridge'
elif name=="10 "or name=="10":
name='vehicle'
else:
print(path)
#print(xmin,ymin,xmax,ymax,name)
xmins.append(xmin)
ymins.append(ymin)
xmaxs.append(xmax)
ymaxs.append(ymax)
names.append(name)
#print(num,xmins,ymins,xmaxs,ymaxs,names)
return num,xmins,ymins,xmaxs,ymaxs,names
def writexml(path,filename,num,xmins,ymins,xmaxs,ymaxs,names,height, width):# Nwpu-vhr-10 < 1000*600
node_root=Element('annotation')
node_folder=SubElement(node_root,'folder')
node_folder.text="VOC2007"
node_filename=SubElement(node_root,'filename')
node_filename.text="%s" % filename
node_size=SubElement(node_root,"size")
node_width = SubElement(node_size, 'width')
node_width.text = '%s' % width
node_height = SubElement(node_size, 'height')
node_height.text = '%s' % height
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
for i in range(num):
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = '%s' % names[i]
node_name = SubElement(node_object, 'pose')
node_name.text = '%s' % "unspecified"
node_name = SubElement(node_object, 'truncated')
node_name.text = '%s' % "0"
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = '%s'% xmins[i]
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = '%s' % ymins[i]
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = '%s' % xmaxs[i]
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = '%s' % ymaxs[i]
xml = tostring(node_root, pretty_print=True)
dom = parseString(xml)
with open(path, 'wb') as f:
f.write(xml)
return
def readsize(path):
img=Image.open(path)
width=img.size[0]
height=img.size[1]
return height, width
if __name__ == "__main__":
path=f1
deal(path)
imag_rename(old_path, new_path)
注意,我在写imag_rename()和deal()的时候都用到了os.mkdir(),当以上代码出错想要调试的时候,一定要在每次调试的时候都把mkdir()创建的文件夹删除掉,否则会报错:路径已经存在
最终处理好的标注文件和图片都会放入预先设定好的输出文件夹里面,就可以用啦!