一个目标检测项目需要自己找图片标注数据进行训练,训练需要YOLO格式,但数据增广需要VOC格式,该文记录如何将labelme标注的数据格式转为YOLO格式,再从YOLO格式转为VOC格式,只作为自己用的记录,如果你刚好也需要这么干,或者需要文中提到的某一种转换,也可以参考一下。文中有些代码是参考其他地方的,时间长已经记不清了,如有侵权请联系更改。
注意:路径不要有中文,标签也用相应的英文
手动完成即可,标签的文件夹最好加个json后缀,因为后面会有其他格式的标签文件。
因为搜集的图片什么格式都有,为了方便训练,统一为jpg格式。
代码如下:
# trans_others_to_jpg.py
import os
import cv2 as cv
image_path = 'D:/DeskTop/Datasets/clothes/images/' #设置图片读取路径
save_path = 'D:/DeskTop/Datasets/clothes/images_jpg/' #设置图片保存路径,新建文件夹,不然其他格式会依然存在
if not os.path.exists(save_path): #判断路径是否正确,并打开
os.makedirs(save_path)
image_file = os.listdir(image_path)
# print(image_file)
for image in image_file:
# print(image)
if image.split('.')[-1] in ['bmp', 'jpg', 'jpeg', 'png', 'JPG', 'PNG']:
str = image.rsplit(".", 1) #从右侧判断是否有符号“.”,并对image的名称做一次分割。如112345.jpeg分割后的str为["112345","jpeg"]
# print(str)
output_img_name = str[0] + ".jpg" #取列表中的第一个字符串与“.jpg”放在一起。
# print(output_img_name)
dir = os.path.join(image_path, image)
# print("dir:",dir)
src = cv.imread(dir)
# print(src)
cv.imwrite(save_path + output_img_name, src)
print('FINISHED')
将文件和对应的标签重命名为从六位数的名字,从000001开始,注意:图片和标签都需要进行重命名
代码如下:
# rename.py
import os
path = "D:/DeskTop/Datasets/clothes/label_json/" # json标签文件的保存路径
filelist = os.listdir(path)
count=1
for file in filelist:
print(file)
for file in filelist:
Olddir=os.path.join(path,file)
if os.path.isdir(Olddir):
continue
filename=os.path.splitext(file)[0]
filetype=os.path.splitext(file)[1]
Newdir=os.path.join(path,str(count).zfill(6)+filetype) # zfill(6):表示命名为6位数
os.rename(Olddir,Newdir)
count+=1
因为上一步只改变了名字,标签内的imagePath并没有跟着变,所以还要改一下,和图片对应起来,其实这一步不做也没事,因为YOLO格式就是根据标签文件名读取图片路径的,为了以后可能需要json的标签,还是改一下最好。
代码如下:
# change_json_imagePath.py
import json
import os
import re
path = 'D:/DeskTop/Datasets/clothes/label_json/' # json文件路径
dirs = os.listdir(path)
num_flag = 0
for file in dirs: # 循环读取路径下的文件并筛选输出
if os.path.splitext(file)[1] == ".json": # 筛选Json文件
num_flag = num_flag + 1
print("path = ", file) # 此处file为json文件名,之前修改为与图片jpg同名
# print(os.path.join(path,file))
with open(os.path.join(path, file), 'r') as load_f: # 若有中文,可将r改为rb
load_dict = json.load(load_f) # 用json.load()函数读取文件句柄,可以直接读取到这个文件中的所有内容,并且读取的结果返回为python的dict对象
n = len(load_dict) # 获取字典load_dict中list值
print('n = ', n)
print("imagePath = ", load_dict['imagePath']) # 此处因为我的json文件要修改的imagePath, 没有那么多弯弯绕, 直接在顶层, 所以一层[]即可, 如果你们的不是这种结构, 需自行修改
filename = file[:-5] # 去掉拓展名5位 .json
print("filename = ", filename)
load_dict['imagePath'] = filename + '.jpg' # 存到当前路径下, 如果有其它存储要求, 自行修改即可
print("new imagePath = ", load_dict['imagePath'])
with open(os.path.join(path, file), 'w') as dump_f:
json.dump(load_dict, dump_f)
if (num_flag == 0):
print('所选文件夹不存在json文件,请重新确认要选择的文件夹')
else:
print('共{}个json文件'.format(num_flag))
将labelme的json格式转为YOLO的txt格式,同样保存txt标签的文件夹最好也加个后缀,方便和json区分,注意把代码第12行改为自己数据集的类别,从0开始
代码如下:
# trans_labelme_to_yolo.py
import cv2
import os
import json
import shutil
import numpy as np
from pathlib import Path
from glob import glob
id2cls = {0: 'clothing'}
cls2id = {'clothing': 0}
#支持中文路径
def cv_imread(filePath):
cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),flags=cv2.IMREAD_COLOR)
return cv_img
def labelme2yolo_single(img_path,label_file):
anno= json.load(open(label_file, "r", encoding="utf-8"))
shapes = anno['shapes']
w0, h0 = anno['imageWidth'], anno['imageHeight']
image_path = os.path.basename(img_path + anno['imagePath'])
labels = []
for s in shapes:
pts = s['points']
x1, y1 = pts[0]
x2, y2 = pts[1]
x = (x1 + x2) / 2 / w0
y = (y1 + y2) / 2 / h0
w = abs(x2 - x1) / w0
h = abs(y2 - y1) / h0
cid = cls2id[s['label']]
labels.append([cid, x, y, w, h])
return np.array(labels), image_path
def labelme2yolo(img_path,labelme_label_dir, save_dir='res/'):
labelme_label_dir = str(Path(labelme_label_dir)) + '/'
save_dir = str(Path(save_dir))
yolo_label_dir = save_dir + '/'
""" yolo_image_dir = save_dir + 'images/'
if not os.path.exists(yolo_image_dir):
os.makedirs(yolo_image_dir) """
if not os.path.exists(yolo_label_dir):
os.makedirs(yolo_label_dir)
json_files = glob(labelme_label_dir + '*.json')
for ijf, jf in enumerate(json_files):
print(ijf+1, '/', len(json_files), jf)
filename = os.path.basename(jf).rsplit('.', 1)[0]
labels, image_path = labelme2yolo_single(img_path,jf)
if len(labels) > 0:
np.savetxt(yolo_label_dir + filename + '.txt', labels)
# shutil.copy(labelme_label_dir + image_path, yolo_image_dir + image_path)
print('Completed!')
if __name__ == '__main__':
img_path = 'D:/DeskTop/Datasets/clothes/images/' # 数据集图片的路径
json_dir = 'D:/DeskTop/Datasets/clothes/label_json/' # json标签的路径
save_dir = 'D:/DeskTop/Datasets/clothes/label_txt/' # 保存的txt标签的路径
labelme2yolo(img_path,json_dir, save_dir)
因为数据增广需要xml格式,所以再进行一次转换,注意把代码第十四行改为自己数据集的类别
代码如下:
# trans_YOLOtxt_to_VOCxml.py
import xml.dom.minidom
import glob
from PIL import Image
from math import ceil
import shutil
import os
yolo_file = 'D:/DeskTop/Datasets/clothes/label_txt2/'# yolo格式下的存放txt标注文件的文件夹
turn_xml_file = 'D:/DeskTop/Datasets/clothes/label_xml/'# 转换后储存xml的文件夹地址
img_file = 'D:/DeskTop/Datasets/clothes/images/'# 存放图片的文件夹
labels = ['clothes'] #这里要改为自己的类别
src_img_dir = img_file
src_txt_dir = yolo_file
src_xml_dir = turn_xml_file #转换后储存xml的文件夹地址
img_Lists = glob.glob(src_img_dir + '/*.jpg')
img_basenames = []
for item in img_Lists:
img_basenames.append(os.path.basename(item))#os.path.basename返回path最后的文件名
img_names = []
for item in img_basenames:
temp1, temp2 = os.path.splitext(item) #os.path.splitext(“文件路径”) 分离文件名与扩展名
img_names.append(temp1)
total_num = len(img_names) #统计当前总共要转换的图片标注数量
count = 0 #技术变量
for img in img_names: #这里的img是不加后缀的图片名称,如:'GF3_SAY_FSI_002732_E122.3_N29.9_20170215_L1A_HH_L10002188179__1__4320___10368'
count +=1
if count % 1000 == 0:
print("当前转换进度{}/{}".format(count,total_num))
im = Image.open((src_img_dir + img + '.jpg'))
width, height = im.size
#打开yolo格式下的txt文件
gt = open(src_txt_dir + img + '.txt').read().splitlines()
if gt:
# 将主干部分写入xml文件中
xml_file = src_xml_dir + img + '.xml'
xml_file = open((src_xml_dir + img + '.xml'), 'w')
xml_file.write('\n' )
xml_file.write(' VOC2007 \n')
xml_file.write(' ' + str(img) + '.jpg' + '\n')
xml_file.write(' \n' )
xml_file.write(' ' + str(width) + '\n')
xml_file.write(' ' + str(height) + '\n')
xml_file.write(' 3 \n')
xml_file.write(' \n')
# write the region of image on xml file
for img_each_label in gt:
spt = img_each_label.split(' ') # 这里如果txt里面是以逗号‘,’隔开的,那么就改为spt = img_each_label.split(',')。
xml_file.write(' )
xml_file.write(' ' + str(labels[int(float(spt[0]))]) + '\n')
xml_file.write(' Unspecified \n')
xml_file.write(' 0 \n')
xml_file.write(' 0 \n')
xml_file.write(' \n' )
center_x = round(float(spt[1].strip()) * width)
center_y = round(float(spt[2].strip()) * height)
bbox_width = round(float(spt[3].strip()) * width)
bbox_height = round(float(spt[4].strip()) * height)
xmin = str(int(center_x - bbox_width / 2))
ymin = str(int(center_y - bbox_height / 2))
xmax = str(int(center_x + bbox_width / 2))
ymax = str(int(center_y + bbox_height / 2))
xml_file.write(' ' + xmin + '\n')
xml_file.write(' ' + ymin + '\n')
xml_file.write(' ' + xmax + '\n')
xml_file.write(' ' + ymax + '\n')
xml_file.write(' \n')
xml_file.write(' \n')
xml_file.write('')
else:
# 将主干部分写入xml文件中
xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
xml_file.write('\n' )
xml_file.write(' VOC2007 \n')
xml_file.write(' ' + str(img) + '.jpg' + '\n')
xml_file.write(' \n' )
xml_file.write(' ' + str(width) + '\n')
xml_file.write(' ' + str(height) + '\n')
xml_file.write(' 3 \n')
xml_file.write(' \n')
xml_file.write('')
验证标签转换后知否正确,用xml标签进行可视化,多测试几张图片,找一些目标多的图片验证标签的正确性
代码如下:
# visualization_xml_OD.py
from lxml import etree
import cv2 as cv
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
def parse_xml_to_dict(xml):
"""
将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
Args:
xml: xml tree obtained by parsing XML file contents using lxml.etree
Returns:
Python dictionary holding XML contents.
"""
if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
def get_xml_info(xml_path):
with open(xml_path) as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = parse_xml_to_dict(xml)["annotation"]
bboxes = []
for index, obj in enumerate(data["object"]):
# 获取每个object的box信息
xmin = int(obj["bndbox"]["xmin"])
xmax = int(obj["bndbox"]["xmax"])
ymin = int(obj["bndbox"]["ymin"])
ymax = int(obj["bndbox"]["ymax"])
# bbox = np.array([xmin, ymin, xmax, ymax])
bbox = [xmin, ymin, xmax, ymax]
bboxes.append(bbox)
return bboxes
img_path = "D:/DeskTop/Datasets/clothes/images/000056.jpg" # 需要可是化的图片
xml_path = "D:/DeskTop/Datasets/clothes/label_xml/000056.xml" # 图片对应的标签
img = cv.imread(img_path)
bboxes = np.array(get_xml_info(xml_path))
for box in bboxes:
pt1 = (box[0], box[1])
pt2 = (box[2], box[3])
cv.rectangle(img, pt1, pt2, (0, 0, 255), 4)
plt.figure(1)
plt.imshow(img[:, :, ::-1], cmap='gray')
plt.show()
至此,从labelme格式转为YOLO和VOC格式的任务就完成了。
下面是将txt标签中的科学计数法表示转为float的代码,有需要的或是强迫症患者可以参考一下。
代码如下:
先将txt中的’+‘替换为’-’
# change_txt_'+'_to_'-'.py
import os
def trans(input_dir, output_dir, word, splitword):
for root, dirs, files in os.walk(input_dir):
for item in files:
if os.path.splitext(item)[1] == ".txt":
f = open(input_dir+item, "r", encoding='UTF-8')
content = f.read()
content = content.replace(word, splitword)
with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:
fval.write(content)
f.close()
if __name__ == '__main__':
# 老文件夹
input_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
# 新文件夹
output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
# 要删除的字符
word='+'
# 要替换成的字符
splitword = "-"
trans(input_dir, output_dir, word, splitword)
再将科学计数法转为float
# !usr/bin env python
# -*- coding: utf-8 -*-
import re
import math
import os
def ConvertELogStrToValue(eLogStr):
"""
convert string of natural logarithm base of E to value
return (convertOK, convertedValue)
eg:
input: -1.1694737e-03
output: -0.001169
input: 8.9455025e-04
output: 0.000895
"""
(convertOK, convertedValue) = (False, 0.0)
foundEPower = re.search("(?P-?\d+\.\d+)e(?P-\d+)" , eLogStr, re.I)
#print "foundEPower=",foundEPower
if(foundEPower):
coefficientPart = foundEPower.group("coefficientPart")
ePowerPart = foundEPower.group("ePowerPart")
#print "coefficientPart=%s,ePower=%s"%(coefficientPart, ePower)
coefficientValue = float(coefficientPart)
ePowerValue = float(ePowerPart)
#print "coefficientValue=%f,ePowerValue=%f"%(coefficientValue, ePowerValue)
#math.e= 2.71828182846
# wholeOrigValue = coefficientValue * math.pow(math.e, ePowerValue)
wholeOrigValue = coefficientValue * math.pow(10, ePowerValue)
#print "wholeOrigValue=",wholeOrigValue;
(convertOK, convertedValue) = (True, wholeOrigValue)
else:
(convertOK, convertedValue) = (False, 0.0)
return (convertOK, convertedValue)
def parseIntEValue(intEValuesStr):
# print "intEValuesStr=", intEValuesStr
intEStrList = re.findall("-?\d+\.\d+e-\d+", intEValuesStr)
# intEStrList = intEValuesStr.split(' ')
# print "intEStrList=", intEStrList
for eachIntEStr in intEStrList:
# intValue = int(eachIntEStr)
# print "intValue=",intValue
(convertOK, convertedValue) = ConvertELogStrToValue(eachIntEStr)
#print "convertOK=%s,convertedValue=%f"%(convertOK, convertedValue)
print("eachIntEStr=%s,\tconvertedValue=%f" % (eachIntEStr, convertedValue))
trans(txt_path,txt_path,eachIntEStr,convertedValue)
def trans(input_dir, output_dir, word, splitword):
for root, dirs, files in os.walk(input_dir):
for item in files:
if os.path.splitext(item)[1] == ".txt":
f = open(input_dir+item, "r", encoding='UTF-8')
content = f.read()
content = content.replace(str(word), str(splitword))
with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:
fval.write(content)
f.close()
# intEValuesStr= 2.1690427e-005 -1.1694737e-003 -6.1193734e-004
# 8.9455025e-004 -8.6277081e-004 -7.2735757e-004
# intEStrList= ['2.1690427e-005', '-1.1694737e-003', '-6.1193734e-004', '8.9455025e-004', '-8.6277081e-004', '-7.2735757e-004']
# eachIntEStr=2.1690427e-005, convertedValue=0.014615
# eachIntEStr=-1.1694737e-003, convertedValue=-0.058225
# eachIntEStr=-6.1193734e-004, convertedValue=-0.112080
# eachIntEStr=8.9455025e-004, convertedValue=0.163843
# eachIntEStr=-8.6277081e-004, convertedValue=-0.158022
# eachIntEStr=-7.2735757e-004, convertedValue=-0.133220
if __name__ == "__main__":
txt_path = "D:\DeskTop\Datasets\clothes\label_txt/"
output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
# data_path = "D:/DeskTop/000001.txt"
for root, dirs, files in os.walk(txt_path):
for item in files:
if os.path.splitext(item)[1] == ".txt":
with open(txt_path + item, 'r') as f:
for line in f.readlines():
linestr = line.strip()
# print linestr
parseIntEValue(linestr)