keras版yolov3训练格式是name box class这种形式,转voc格式使用一下代码,根据别人的代码改了一点。list.txt为yolo的标签,转换的voc格式的标签为.xml文件,都存放在Annotations下。
from xml.dom.minidom import Document
from lxml.etree import Element, SubElement, tostring
import pprint
from xml.dom.minidom import parseString
import cv2
class XmlMaker:
def __init__(self,txtpath,xmlpath):
self.txtPath = txtpath
self.xmlPath = xmlpath
self.txtList = []
def readtxt(self):
jpg = []
txtfile = open(self.txtPath,"r",encoding='gbk',errors='ignore')
self.txtList = txtfile.readlines()
for i in self.txtList:
jpg = i.strip().split(" ")[0]
xys = i.strip().split(" ")[1:]
#print(xys)
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'VOC2012'
node_filename = SubElement(node_root, 'filename')
node_filename.text = jpg
img = cv2.imread(jpg)
shape = img.shape
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = str(shape[1])
node_height = SubElement(node_size, 'height')
node_height.text = str(shape[0])
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
for xy in xys:
list_xy = xy.split(",")
for tmp in list_xy:
x_min = list_xy[0]
y_min = list_xy[1]
x_max = list_xy[2]
y_max = list_xy[3]
classes = list_xy[4]
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = 'person'
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(x_min)
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(y_min)
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(x_max)
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(y_max)
xml = tostring(node_root, pretty_print=True) # 格式化显示,该换行的换行
xml_name = jpg.split("/")[-1][:-4]+".xml"
print(xml_name)
with open(self.xmlPath+"/"+xml_name, "wb") as f:
f.write(xml)
f.close()
if __name__ == "__main__":
read =XmlMaker("list.txt","Annotations")
read.readtxt()
import xml.etree.ElementTree as ET
from os import getcwd
sets=[('2012', 'train'), ('2012', 'val')]
classes = ["cola","milk tea","ice tea","beer","shampoo","toothpaste","soap","pear","apple","orange"]
def convert_annotation(year, image_id, list_file):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
tree=ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
wd = getcwd()
for year, image_set in sets:
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
convert_annotation(year, image_id, list_file)
list_file.write('\n')
list_file.close()
import json
from collections import defaultdict
name_box_id = defaultdict(list)
id_name = dict()
f = open(
"coco/annotations/instances_train2014.json",
encoding='utf-8')
data = json.load(f)
annotations = data['annotations']
for ant in annotations:
id = ant['image_id']
name = 'coco/train2014/COCO_train2014_%012d.jpg' % id
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
f = open('train.txt', 'w')
for key in name_box_id.keys():
f.write(key)
box_infos = name_box_id[key]
for info in box_infos:
x_min = int(info[0][0])
y_min = int(info[0][1])
x_max = x_min + int(info[0][2])
y_max = y_min + int(info[0][3])
box_info = " %d,%d,%d,%d,%d" % (
x_min, y_min, x_max, y_max, int(info[1]))
f.write(box_info)
f.write('\n')
f.close()