记录一下用Efficientdet训练DOTA数据集前的数据准备工作
DOTA数据集(v1.0):
15类:‘plane’, ‘baseball-diamond’, ‘bridge’, ‘ground-track-field’,
‘small-vehicle’, ‘large-vehicle’, ‘ship’, ‘tennis-court’,
‘basketball-court’, ‘storage-tank’, ‘soccer-ball-field’,
‘roundabout’, ‘harbor’, ‘swimming-pool’, ‘helicopter’
2806张png格式的航空影像,188282个实例对象,图像尺寸为800x800~4000X4000
图片:train、val、test
标签:PXXXX.txt,标注格式为
imagesource:影像来源
gsd:地面采样距离(若缺少,则注释为null)
x1 y1 x2 y2 x3 y3 x4 y4 category difficult
#(四个角点坐标,类别,difficult=0不困难/1困难)
Dataset
|–train2017
|–val2017
|–annotations
|–instances_trian2017.json
|–instances_val2017.json
将train文件夹重命名为train2017,val重命名为val2017,二者的标签统一放于./DOTA/labeltxt/下
train.txt,val.txt按行存放影像名称
'''
数据集所有影像名称,写入tran/val.txt 0-2806
'''
import os
txt_path="/home/DOTA/train.txt" #val.txt
file_path = "/home/DOTA/train2017/" #val2017
"
path_list = os.listdir(file_path) #遍历整个文件夹下的文件name并返回一个列表
path_list.sort() #--------------
path_name = []
for i in path_list:
path_name.append(i.split(".")[0]) #若带有后缀名,利用循环遍历path_list列表,split去掉后缀名
#path_name.append(i)
for file_name in path_name:
# "a"表示以不覆盖的形式写入到文件中,当前文件夹如果没有"save.txt"会自动创建
with open(txt_path, "a") as file:
file.write(file_name + "\n")
#print(file_name)
file.close()
因为我后面要以xml格式进行数据增强,所以没有把DOTA的txt直接转换为json而是txt-xml,xml-json。
生成的xml位于./DOTA/Annotations_xml/下
'''
DOTA数据集中标签为txt,将其转换为xml
类别数目:15
类别名称:plane, ship, storage tank, baseball diamond, tennis court, basketball court,
ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout,
soccer ball field , swimming pool
'''
import os
import cv2
from xml.dom.minidom import Document
# import importlib,sys
category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
'basketball-court', 'storage-tank', 'soccer-ball-field',
'roundabout', 'harbor', 'swimming-pool', 'helicopter']
def custombasename(fullname):
return os.path.basename(os.path.splitext(fullname)[0])
def limit_value(a, b):
if a < 1:
a = 1
if a >= b:
a = b - 1
return a
def readlabeltxt(txtpath, height, width, hbb=True):
print(txtpath)
with open(txtpath, 'r') as f_in: # 打开txt文件
lines = f_in.readlines()
splitlines = [x.strip().split(' ') for x in lines] # 根据空格分割
boxes = []
for i, splitline in enumerate(splitlines):
if i in [0, 1]: # DOTA数据集前两行对于我们来说是无用的
continue
#
# if len(splitline)<10:
# print(txtpath+lines)
label = splitline[8]
kunnan = splitline[9]
if label not in category_set: # 只书写制定的类别
print(label)
continue
x1 = int(float(splitline[0]))
y1 = int(float(splitline[1]))
x2 = int(float(splitline[2]))
y2 = int(float(splitline[3]))
x3 = int(float(splitline[4]))
y3 = int(float(splitline[5]))
x4 = int(float(splitline[6]))
y4 = int(float(splitline[7]))
# 如果是hbb
if hbb:
xx1 = min(x1, x2, x3, x4)
xx2 = max(x1, x2, x3, x4)
yy1 = min(y1, y2, y3, y4)
yy2 = max(y1, y2, y3, y4)
xx1 = limit_value(xx1, width)
xx2 = limit_value(xx2, width)
yy1 = limit_value(yy1, height)
yy2 = limit_value(yy2, height)
box = [xx1, yy1, xx2, yy2, label, kunnan]
boxes.append(box)
else: # 否则是obb
x1 = limit_value(x1, width)
y1 = limit_value(y1, height)
x2 = limit_value(x2, width)
y2 = limit_value(y2, height)
x3 = limit_value(x3, width)
y3 = limit_value(y3, height)
x4 = limit_value(x4, width)
y4 = limit_value(y4, height)
box = [x1, y1, x2, y2, x3, y3, x4, y4, label, kunnan]
boxes.append(box)
return boxes
def writeXml(tmp, imgname, w, h, d, bboxes, hbb=True):
doc = Document()
# owner
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
# owner
folder = doc.createElement('folder')
annotation.appendChild(folder)
folder_txt = doc.createTextNode("VOC2007")
folder.appendChild(folder_txt)
filename = doc.createElement('filename')
annotation.appendChild(filename)
filename_txt = doc.createTextNode(imgname)
filename.appendChild(filename_txt)
# ones#
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
source.appendChild(database)
database_txt = doc.createTextNode("My Database")
database.appendChild(database_txt)
annotation_new = doc.createElement('annotation')
source.appendChild(annotation_new)
annotation_new_txt = doc.createTextNode("VOC2007")
annotation_new.appendChild(annotation_new_txt)
image = doc.createElement('image')
source.appendChild(image)
image_txt = doc.createTextNode("flickr")
image.appendChild(image_txt)
# owner
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid = doc.createElement('flickrid')
owner.appendChild(flickrid)
flickrid_txt = doc.createTextNode("NULL")
flickrid.appendChild(flickrid_txt)
ow_name = doc.createElement('name')
owner.appendChild(ow_name)
ow_name_txt = doc.createTextNode("idannel")
ow_name.appendChild(ow_name_txt)
# onee#
# twos#
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
size.appendChild(width)
width_txt = doc.createTextNode(str(w))
width.appendChild(width_txt)
height = doc.createElement('height')
size.appendChild(height)
height_txt = doc.createTextNode(str(h))
height.appendChild(height_txt)
depth = doc.createElement('depth')
size.appendChild(depth)
depth_txt = doc.createTextNode(str(d))
depth.appendChild(depth_txt)
# twoe#
segmented = doc.createElement('segmented')
annotation.appendChild(segmented)
segmented_txt = doc.createTextNode("0")
segmented.appendChild(segmented_txt)
for bbox in bboxes:
# threes#
object_new = doc.createElement("object")
annotation.appendChild(object_new)
name = doc.createElement('name')
object_new.appendChild(name)
name_txt = doc.createTextNode(str(bbox[-2]))
name.appendChild(name_txt)
pose = doc.createElement('pose')
object_new.appendChild(pose)
pose_txt = doc.createTextNode("Unspecified")
pose.appendChild(pose_txt)
truncated = doc.createElement('truncated')
object_new.appendChild(truncated)
truncated_txt = doc.createTextNode("0")
truncated.appendChild(truncated_txt)
difficult = doc.createElement('difficult')
object_new.appendChild(difficult)
difficult_txt = doc.createTextNode(bbox[-1])
difficult.appendChild(difficult_txt)
# threes-1#
bndbox = doc.createElement('bndbox')
object_new.appendChild(bndbox)
if hbb:
xmin = doc.createElement('xmin')
bndbox.appendChild(xmin)
xmin_txt = doc.createTextNode(str(bbox[0]))
xmin.appendChild(xmin_txt)
ymin = doc.createElement('ymin')
bndbox.appendChild(ymin)
ymin_txt = doc.createTextNode(str(bbox[1]))
ymin.appendChild(ymin_txt)
xmax = doc.createElement('xmax')
bndbox.appendChild(xmax)
xmax_txt = doc.createTextNode(str(bbox[2]))
xmax.appendChild(xmax_txt)
ymax = doc.createElement('ymax')
bndbox.appendChild(ymax)
ymax_txt = doc.createTextNode(str(bbox[3]))
ymax.appendChild(ymax_txt)
else:
x0 = doc.createElement('x0')
bndbox.appendChild(x0)
x0_txt = doc.createTextNode(str(bbox[0]))
x0.appendChild(x0_txt)
y0 = doc.createElement('y0')
bndbox.appendChild(y0)
y0_txt = doc.createTextNode(str(bbox[1]))
y0.appendChild(y0_txt)
x1 = doc.createElement('x1')
bndbox.appendChild(x1)
x1_txt = doc.createTextNode(str(bbox[2]))
x1.appendChild(x1_txt)
y1 = doc.createElement('y1')
bndbox.appendChild(y1)
y1_txt = doc.createTextNode(str(bbox[3]))
y1.appendChild(y1_txt)
x2 = doc.createElement('x2')
bndbox.appendChild(x2)
x2_txt = doc.createTextNode(str(bbox[4]))
x2.appendChild(x2_txt)
y2 = doc.createElement('y2')
bndbox.appendChild(y2)
y2_txt = doc.createTextNode(str(bbox[5]))
y2.appendChild(y2_txt)
x3 = doc.createElement('x3')
bndbox.appendChild(x3)
x3_txt = doc.createTextNode(str(bbox[6]))
x3.appendChild(x3_txt)
y3 = doc.createElement('y3')
bndbox.appendChild(y3)
y3_txt = doc.createTextNode(str(bbox[7]))
y3.appendChild(y3_txt)
xmlname = os.path.splitext(imgname)[0]
tempfile = os.path.join(tmp, xmlname + '.xml')
with open(tempfile, 'wb') as f:
f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
return
if __name__ == '__main__':
data_path = '/home/DOTA/train2017' #val2017
images_path = os.path.join(data_path) # 样本图片路径
labeltxt_path = os.path.join('/home/DOTA/labeltxt/') #
anno_new_path = os.path.join('/home/DOTA/Annotations_xml/') # 新的voc格式存储位置(hbb形式)
ext = '.png' # 样本图片的后缀
filenames = os.listdir(labeltxt_path) # 获取每一个txt的名称
for filename in filenames:
filepath = labeltxt_path + '/' + filename # 每一个DOTA标签的具体路径
picname = os.path.splitext(filename)[0] + ext
pic_path = os.path.join(images_path, picname)
im = cv2.imread(pic_path) # 读取相应的图片
(H, W, D) = im.shape # 返回样本的大小
boxes = readlabeltxt(filepath, H, W, hbb=True) # 默认是矩形(hbb)得到gt
if len(boxes) == 0:
print('文件为空', filepath)
# 读取对应的样本图片,得到H,W,D用于书写xml
# 书写xml
writeXml(anno_new_path, picname, W, H, D, boxes, hbb=True)
print('正在处理%s' % filename)
生成的json位于./annotations/
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import os.path as osp
from collections import OrderedDict #获取一个有序的字典对象
import json
import xmltodict
import mmcv #在visual stuio里面添加vc++模块再安装
logger = logging.getLogger(__name__)
class PASCALVOC2COCO(object):
"""Converters that convert PASCAL VOC annotations to MSCOCO format."""
def __init__(self): #初始化参数
self.cat2id = {
'plane':1, 'baseball-diamond':2, 'bridge':3, 'ground-track-field':4,
'small-vehicle':5, 'large-vehicle':6, 'ship':7, 'tennis-court':8,
'basketball-court':9, 'storage-tank':10, 'soccer-ball-field':11,
'roundabout':12, 'harbor':13, 'swimming-pool':14, 'helicopter':15
} #名称对应的序号数
def get_img_item(self, file_name, image_id, size):
"""Gets a image item."""
image = OrderedDict() #建立一个有序的字典对象
image['file_name'] = file_name
image['height'] = int(size['height'])
image['width'] = int(size['width'])
image['id'] = image_id
return image
def get_ann_item(self, obj, image_id, ann_id):
"""Gets an annotation item."""
x1 = int(obj['bndbox']['xmin']) - 1
y1 = int(obj['bndbox']['ymin']) - 1
w = int(obj['bndbox']['xmax']) - x1
h = int(obj['bndbox']['ymax']) - y1
annotation = OrderedDict()
annotation['segmentation'] = [[x1, y1, x1, (y1 + h), (x1 + w), (y1 + h), (x1 + w), y1]]
annotation['area'] = w * h
annotation['iscrowd'] = 0
annotation['image_id'] = image_id
annotation['bbox'] = [x1, y1, w, h]
annotation['category_id'] = self.cat2id[obj['name']]
annotation['id'] = ann_id
# annotation['ignore'] = int(obj['difficult'])
annotation['ignore'] = int(1)
return annotation
def get_cat_item(self, name, id):
"""Gets an category item."""
category = OrderedDict()
category['supercategory'] = 'none'
category['id'] = id
category['name'] = name
return category
def convert(self, devkit_path, year, split, save_file):
"""Converts PASCAL VOC annotations to MSCOCO format. """
#r'T:\shujuji\VOC2007+2012\VOCtrainval_06-Nov-2007\VOCdevkit'
# year = '2007'
# split = 'train'
split_file = osp.join(devkit_path, '{}.txt'.format(split)) #获取训练样本的名称
ann_dir = osp.join(devkit_path, 'Annotations') #获取xml文件
name_list = mmcv.list_from_file(split_file)
images, annotations = [], []
ann_id = 1
for name in name_list:
image_id = int(''.join(name.split('_'))) if '_' in name else int(name)
xml_file = osp.join(ann_dir, name + '.xml')
with open(xml_file, 'r') as f:
ann_dict = xmltodict.parse(f.read(), force_list=('object',))
# Add image item.
image = self.get_img_item(name + '.jpg', image_id, ann_dict['annotation']['size'])
images.append(image)
if 'object' in ann_dict['annotation']:
for obj in ann_dict['annotation']['object']:
# Add annotation item.
annotation = self.get_ann_item(obj, image_id, ann_id)
annotations.append(annotation)
ann_id += 1
else:
logger.warning('{} does not have any object'.format(name))
categories = []
for name, id in self.cat2id.items():
# Add category item.
category = self.get_cat_item(name, id)
categories.append(category)
ann = OrderedDict()
ann['images'] = images
ann['type'] = 'instances'
ann['annotations'] = annotations
ann['categories'] = categories
logger.info('Saving annotations to {}'.format(save_file))
with open(save_file, 'w') as f:
json.dump(ann, f)
if __name__ == '__main__':
converter = PASCALVOC2COCO()
devkit_path = r'/home/DOTA'
year = '2017'
split = 'train'
save_file = '/home/DOTA/annotations/instances_train2017.json' #val
converter.convert(devkit_path, year, split, save_file)