1、json文件转换为xml文件
原始bdd数据集是json格式的标注文件,由于我使用caffe训练,所以必须要将其转化为
VOC的xml格式的标注文件。所以直接使用Python脚本将其转化为xml。bdd数据集中包含有10个类别,包括bus,light,sign,person,bike,truck,motor,car,train,rider,具体关于数据集的介绍,请参照。
我现在只需要数据集中8类,暂时不需要light和sign。可能这两类和国内的不太一样,所以我直接去掉。
转换代码 pascal_voc_io.py
#!/usr/bin/env python
# -*- coding: utf8 -*-
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
class PascalVocWriter:
def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
self.foldername = foldername
self.filename = filename
self.databaseSrc = databaseSrc
self.imgSize = imgSize
self.boxlist = []
self.localImgPath = localImgPath
def prettify(self, elem):
"""
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True)
def genXML(self):
"""
Return XML root
"""
# Check conditions
if self.filename is None or \
self.foldername is None or \
self.imgSize is None or \
len(self.boxlist) <= 0:
return None
top = Element('annotation')
folder = SubElement(top, 'folder')
folder.text = self.foldername
filename = SubElement(top, 'filename')
filename.text = self.filename
localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath
source = SubElement(top, 'source')
database = SubElement(source, 'database')
database.text = self.databaseSrc
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize) == 3:
depth.text = str(self.imgSize[2])
else:
depth.text = '1'
segmented = SubElement(top, 'segmented')
segmented.text = '0'
return top
def addBndBox(self, xmin, ymin, xmax, ymax, name):
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name
self.boxlist.append(bndbox)
def appendObjects(self, top):
for each_object in self.boxlist:
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
name.text = unicode(each_object['name'])
pose = SubElement(object_item, 'pose')
pose.text = "Unspecified"
truncated = SubElement(object_item, 'truncated')
truncated.text = "0"
difficult = SubElement(object_item, 'Difficult')
difficult.text = "0"
bndbox = SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
xmin.text = str(each_object['xmin'])
ymin = SubElement(bndbox, 'ymin')
ymin.text = str(each_object['ymin'])
xmax = SubElement(bndbox, 'xmax')
xmax.text = str(each_object['xmax'])
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax'])
def save(self, targetFile=None):
root = self.genXML()
self.appendObjects(root)
out_file = None
if targetFile is None:
out_file = open(self.filename + '.xml', 'w')
else:
out_file = open(targetFile, 'w')
prettifyResult = self.prettify(root)
out_file.write(prettifyResult)
out_file.close()
class PascalVocReader:
def __init__(self, filepath):
# shapes type:
# [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
self.shapes = []
self.filepath = filepath
self.parseXML()
def getShapes(self):
return self.shapes
def addShape(self, label, bndbox):
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, None, None))
def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format"
parser = etree.XMLParser(encoding='utf-8')
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
self.addShape(label, bndbox)
return True
# tempParseReader = PascalVocReader('test.xml')
# print tempParseReader.getShapes()
"""
# Test
tmp = PascalVocWriter('temp','test', (10,20,3))
tmp.addBndBox(10,10,20,30,'chair')
tmp.addBndBox(1,1,600,600,'car')
tmp.save()
"""
parseJson.py
#!/usr/bin/env python
# -*- coding: utf8 -*-
#parse json,input json filename,output info needed by voc
import json
#这里是我需要的8个类别
categorys = ['car','bus','person','bike','truck','motor','train','rider']
def parseJson(jsonFile):
objs = []
obj = []
f = open(jsonFile)
info = json.load(f)
objects = info['frames'][0]['objects']
for i in objects:
if(i['category'] in categorys):
obj.append(int(i['box2d']['x1']))
obj.append(int(i['box2d']['y1']))
obj.append(int(i['box2d']['x2']))
obj.append(int(i['box2d']['y2']))
obj.append(i['category'])
objs.append(obj)
obj = []
#print("objs",objs)
return objs
#test
#parseJson("/home/nextcar/桌面/0a0a0b1a-7c39d841.json")
bdd2voc.py
import os
import pascal_voc_io
import parseJson
dirName = "/media/0A4811140A481114/bdd100k_labels/labels/100k/val"
i = 1
for dirpath,dirnames,filenames in os.walk(dirName):
for filepath in filenames:
fileName = os.path.join(dirpath,filepath)
print("processing: ",i)
i = i + 1
xmlFileName = filepath[:-5]
#print("xml: ",xmlFileName)
objs = parseJson.parseJson(str(fileName))
if len(objs):
tmp = pascal_voc_io.PascalVocWriter('Annotations',xmlFileName, (720,1280,3))
for obj in objs:
tmp.addBndBox(obj[0],obj[1],obj[2],obj[3],obj[4])
tmp.save()
else:
print(fileName)
直接调用bdd2voc.py就可以生成xml文件了,这样生成的文件就可以直接用作训练了。
转换成lmdb文件然后训练,参考
训练过程中,非常难以收敛,怎么调都不好使啊
然后在test的时候,把所有类别的准确度全部打印出来,然后惊奇地发现,我的天!train这一类的准确率只有百分之零点几
然后我意识到这个数据集类别数量分布及其不均匀
但是具体的数量是多少呢?打印出来看看
cd data/Annotations
find ./ type -f | xargs grep -ri "train" | wc -l
不看不知道,一看吓一跳啊,trian只有几十,但是car却有几十万,这个相差太大了,肯定会不好训练的
然后使用Python脚本将所有xml中的train的object节点全部删掉。如果一个xml文件中只有train这一类,那么这个xml文件就是空的,必须得删掉,然后对应的pic文件也要删掉。
#!/usr/bin/env python
# -*- coding: utf8 -*-
import pascal_voc_io
import os
xmlPath = "/home/Work/data/xml"
picPath = "/home/Work/data/pic"
def delSmallObj(objInfos):
objsList = []
objList = []
isNull = False
for obj in objInfos:
objName = obj[0]
xmin = obj[1][0][0]
ymin = obj[1][0][1]
xmax = obj[1][1][0]
ymax = obj[1][2][1]
w = xmax - xmin
h = ymax - ymin
#删除train的obj节点
if(objName != "train"):
objList.append(int(xmin))
objList.append(int(ymin))
objList.append(int(xmax))
objList.append(int(ymax))
objList.append(objName)
objsList.append(objList)
objList = []
if len(objsList) == 0:
isNull = True
return isNull,objsList
#生成xml,并返回xml为空的文件名
def gen():
toDelXml = []
for dirpath,dirnames,filenames in os.walk(xmlPath):
for filepath in filenames:
fileName = os.path.join(dirpath,filepath)
xmlFileName = filepath[:-4]
tempParseReader = pascal_voc_io.PascalVocReader(fileName)
objInfos = tempParseReader.getShapes()
isNull ,objs = delSmallObj(objInfos)
if isNull == False:
tmp = pascal_voc_io.PascalVocWriter('Annotations',xmlFileName, (720,1280,3))
for obj in objs:
tmp.addBndBox(obj[0],obj[1],obj[2],obj[3],obj[4])
tmp.save()
else:
toDelXml.append(fileName)
return toDelXml
def DelEmptyXmlAndPic(toDelXml):
for xmlFile in toDelXml:
picFileName = xmlFile[:-4] + '.jpg'
picFileName = picFileName.replace("xml","pic")
print("deleting file: ",xmlFile)
os.remove(picFileName)
if __name__ == "__main__":
toDelXml = gen()
DelEmptyXmlAndPic(toDelXml)
print("empty num is: ", len(toDelXml))
这样之后数据集应该是没有太大问题了,虽然里面的类别还是不太均衡的。先训练再说吧。