参考链接:一次将自己的数据集制作成PASCAL VOC格式的惨痛经历
目标检测数据集VOC格式有三个重要的文件夹:Annotations,ImageSets和JPEGImages
Annotations:数据集图片对应的xml文件,需要自己写文件名,box的xmin,ymin,xmax,ymax
ImageSets:里面的Main文件夹内容包括test.txt
、 train.txt
、val.txt
、trainval.txt
,每个txt里面是数据集图片名(无后缀),1/-1(正/负样本)(可以不写),train和val不能有交集
JPEGImages:数据集图片,格式000001.jpg
生成文件夹代码如下:
import os
import shutil
os.makedirs('VOC2007/Annotations')
os.makedirs('VOC2007/ImageSets')
os.makedirs('VOC2007/ImageSets/Main')
os.makedirs('VOC2007/ImageSets/Layout')
os.makedirs('VOC2007/ImageSets/Segmentation')
os.makedirs('VOC2007/JPEGImages')
os.makedirs('VOC2007/SegmentationClass')
os.makedirs('VOC2007/SegmentationObject')
参考链接:Python+OpenCV实现png透明图像叠加在jpg图像上 (亲测可用)
xml文件里需要修改的只有文件名和bbox数值
import cv2
import numpy as np
import os
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import random
def save_xml(image_name, bbox, save_dir, width=1080, height=1920, channel=3):
# 生成xml文件
'''
:param image_name: 图片名
:param bbox: 对应的bbox
:param save_dir: xml文件保存路径
:param width: 图片宽度
:param height: 图片高度
:param channel: 图片通道
:return:
'''
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'JPEGImages'
node_filename = SubElement(node_root, 'filename')
node_filename.text = image_name
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = '%s' % width
node_height = SubElement(node_size, 'height')
node_height.text = '%s' % height
node_depth = SubElement(node_size, 'depth')
node_depth.text = '%s' % channel
for xmin, ymin, xmax, ymax in bbox: # bbox是个set, 内容类似于{(1,2,3,5),(5,6,2,5)}
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = 'beetle'
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = '%s' % xmin
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = '%s' % ymin
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = '%s' % xmax
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = '%s' % ymax
xml = tostring(node_root, pretty_print=True)
dom = parseString(xml)
save_xml = os.path.join(save_dir, image_name.replace('jpg', 'xml'))
with open(save_xml, 'wb') as f: # 自动关闭文件,无需手动书写close()
f.write(xml)
return
def generate_txt(path_xml, path_txt):
# 根据xml文件生成txt文件
val_percent = 0.2
train_percent = 0.6
# 求出数据总的数目
total_xml = os.listdir(path_xml)
num = len(total_xml)
list = range(num)
# 求出各部分的数目
train_number = int(num * train_percent)
val_number = int(num * val_percent)
trainval_number = int(train_number + val_number)
# 各部分的样本
# 从总的数据集中,先挑train+val,再从train+val中,挑出train
trainval = random.sample(list, trainval_number)
train = random.sample(trainval, train_number)
# 确认数据集各部分的数目
print("train加val的数目", trainval_number)
print("train的数目", train_number)
ftrainval = open(path_txt+'trainval.txt', 'w')
ftest = open(path_txt+'test.txt', 'w')
ftrain = open(path_txt+'train.txt', 'w')
fval = open(path_txt+'val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n' # Python的换行符是'\n'
if i in trainval:
ftrainval.write(name) # train+val
if i in train:
ftrain.write(name) # train
else:
fval.write(name) # val
else:
ftest.write(name) # test
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
if __name__ == '__main__':
path_img_png = '1600.png' # 前景文件(虫子)路径 要求是正方形
path_img_input = 'test/' # 定义要处理的背景图片路径 !!!!!!!!!!! 路径不能有中文
path_img_output = 'VOC2007/JPEGImages/' # 输出图像路径
path_xml = 'VOC2007/Annotations'
path_txt = 'VOC2007/ImageSets/Main/'
img_png = cv2.imread(path_img_png, cv2.IMREAD_UNCHANGED) # 读取图像
idx = 1
filename_list = os.listdir(path_img_input)
for i in filename_list:
print(i)
img_jpg_path = path_img_input + filename_list[idx-1] # 背景文件路径
img_jpg = cv2.imread(img_jpg_path, cv2.IMREAD_UNCHANGED) # 读取图像
img_jpg = cv2.resize(img_jpg, (1080, 1920)) # 图像缩放到规定大小
beetle_num = 5 # 虫子数量
coordinate = set() # 记录虫子box坐标
for i in range(beetle_num):
# 设置叠加位置坐标
x1 = random.randint(1, 1020)
y1 = random.randint(1, 1820)
x2 = x1 + img_png.shape[1]
y2 = y1 + img_png.shape[0]
new_tuple = (x1, y1, x2, y2)
coordinate.add(new_tuple)
# 控制虫子旋转
rd = random.random()
if (rd > 0.5):
img_png = np.rot90(img_png, -1) # 逆时针旋转90度
else:
pass
img_jpg = merge_img(img_jpg, img_png, y1, y2, x1, x2) # 开始叠加
res_img = img_jpg
print(coordinate)
# 保存图片的名字
new_name = ""
if idx < 10:
new_name = "00000" + str(idx) + ".jpg"
elif idx < 100:
new_name = "0000" + str(idx) + ".jpg"
elif idx < 1000:
new_name = "000" + str(idx) + ".jpg"
elif idx < 10000:
new_name = "00" + str(idx) + ".jpg"
elif idx < 100000:
new_name = "0" + str(idx) + ".jpg"
idx = idx+1
# 保存结果图像,可自行修改文件路径
cv2.imwrite(path_img_output + new_name, res_img)
save_xml(new_name, coordinate, path_xml)# 保存图片对应xml文件
# 生成txt文件
generate_txt(path_xml, path_txt)