PaddleDetection所使用的数据为VOC格式,使用labelimg作为标注工具,标注工具的下载安装见链接:
https://zhuanlan.zhihu.com/p/9780704
下面以水果分类为例,演示一下数据标注过程,原始图片数据如下图所示:
点击Create\nRectBox—框选目标–命名(下图中命名为apple)–点击OK—点击Save(完成一张)–点击Next Image 标注下一张:
标注完成以后会生成很多的xml文件:
打开一个xml文件:
---VOC
------create_list.py
--------label_list.txt
--------train.txt
--------val.txt
------Annotations
---------n个xml文件
------ImagesSet
---------Main
--------get_list.py
--------label_list.txt
--------train.txt
--------val.txt
------JPEGImages
---------n个img文件
其中Annotations存放标注生成的xml文件,JPEGImage存放图片,ImageSets存放对训练集和数据集的划分。
ImageSet下有Main文件,Main下需要建立一个label_list.txt,指标注的目标的名称(VOC文件夹下的label_list.txt与之相同):
运行Main文件夹中的get_list.py,即可生成train.txt和val.txt,即将300张水果图片分成训练集和验证集两部分(此处生成的train.txt和val.txt是Main文件夹里的):
//路径需要自己修改
import os
import random
train_precent=0.8 //划分比例
xml="C:/Users/Administrator/Desktop/dataset/fruit/fruit-detection/Annotations"
save="C:/Users/Administrator/Desktop/dataset/fruit/fruit-detection/ImageSets/Main"
total_xml=os.listdir(xml)
num=len(total_xml)
list=range(num)
tr=int(num*train_precent)
train=random.sample(list,tr)
ftrain=open("C:/Users/Administrator/Desktop/dataset/fruit/fruit-detection/ImageSets/Main/train.txt","w")
ftest=open("C:/Users/Administrator/Desktop/dataset/fruit/fruit-detection/ImageSets/Main/val.txt","w")
for i in range(num):
name=total_xml[i][:-4]+"\n"
if i in train:
ftrain.write(name)
else:
ftest.write(name)
ftrain.close()
ftest.close()
运行VOC文件夹中的create_list.py,即可生成train.txt和val.txt,即生成含有路径信息以及图像和xml文件一一对应的文件(此处生成的train.txt和val.txt是VOC文件夹里的):
import os
import os.path as osp
import re
import random
devkit_dir = './'
def get_dir(devkit_dir, type):
return osp.join(devkit_dir, type)
def walk_dir(devkit_dir):
filelist_dir = get_dir(devkit_dir, 'ImageSets/Main')
annotation_dir = get_dir(devkit_dir, 'Annotations')
img_dir = get_dir(devkit_dir, 'JPEGImages')
trainval_list = []
test_list = []
added = set()
for _, _, files in os.walk(filelist_dir):
for fname in files:
img_ann_list = []
if re.match('train\.txt', fname):
img_ann_list = trainval_list
elif re.match('val\.txt', fname):
img_ann_list = test_list
else:
continue
fpath = osp.join(filelist_dir, fname)
for line in open(fpath):
name_prefix = line.strip().split()[0]
if name_prefix in added:
continue
added.add(name_prefix)
ann_path = osp.join(annotation_dir, name_prefix + '.xml')
img_path = osp.join(img_dir, name_prefix + '.jpg')
assert os.path.isfile(ann_path), 'file %s not found.' % ann_path
assert os.path.isfile(img_path), 'file %s not found.' % img_path
img_ann_list.append((img_path, ann_path))
return trainval_list, test_list
def prepare_filelist(devkit_dir, output_dir):
trainval_list = []
test_list = []
trainval, test = walk_dir(devkit_dir)
trainval_list.extend(trainval)
test_list.extend(test)
random.shuffle(trainval_list)
with open(osp.join(output_dir, 'train.txt'), 'w') as ftrainval:
for item in trainval_list:
ftrainval.write(item[0] + ' ' + item[1] + '\n')
with open(osp.join(output_dir, 'val.txt'), 'w') as ftest:
for item in test_list:
ftest.write(item[0] + ' ' + item[1] + '\n')
if __name__ == '__main__':
prepare_filelist(devkit_dir, '.')
https://pan.baidu.com/wap/init?surl=ZO8OlqvWuMk_sqOwlo56rg
提取码:vw3b