【深度学习 招式篇】制作VOC数据集必看! 各种快捷所需python代码 YOLO 训练自己的数据集
-
- 批量 更改该目录下所有文件名 【change_mingzi.py】
- 遍历一个文件夹下所有文件,提取生成目录形式的.txt文件【tiqu_name.py】
- 批量查找 错误标记的.xml文件 遍历目录替换更改错误标注 比如cat(空格)改成cat 【change_label.py】
- 将.xml转换为.txt 【这里提供2种方法】【xml_to_txt.py】【xml2txt.py】
- 更换txt文件里指定字符为其他指定字符
- 绘制log-iou的曲线【log——train_iou_visualization.py 】
- 绘制log-loss的曲线【train_loss_visualization.py】
批量 更改该目录下所有文件名 【change_mingzi.py】
import os
path='E:\\标注数据\\ceshi\\'
i=1
for file in os.listdir(path):
if os.path.isfile(os.path.join(path,file))==True:
new_name=file.replace(file,"ZaoxiuImage_%d.jpg"%i)
os.rename(os.path.join(path,file),os.path.join(path,new_name))
i+=1
print ("End")
遍历一个文件夹下所有文件,提取生成目录形式的.txt文件【tiqu_name.py】
import os
import re
images_path = 'E:\\yolo_tf2.1\\VOCdevkit\\VOC\\Annotations\\'
txt_save_path = E:\\yolo_tf2.1\\VOCdevkit\VOC\\zaoxiu.txt'
fw = open(txt_save_path, "w")
def read_directory(directory_name):
for filename in os.listdir(directory_name):
fw.write(filename + '\n')
read_directory(images_path)
批量查找 错误标记的.xml文件 遍历目录替换更改错误标注 比如cat(空格)改成cat 【change_label.py】
import os
file_path = []
def func():
fileDir = r"C:\\Users\\Spectator\\Desktop\\tihuan\\1"
for root, dirs, files in os.walk(fileDir):
for fileitem in files:
file_path.append('{}/{}'.format(root,fileitem))
def alter(file,old_str,new_str):
file_data = ""
with open(file, "r") as f:
for line in f:
if old_str in line:
line = line.replace(old_str,new_str)
file_data += line
with open(file,"w") as f:
f.write(file_data)
if __name__ == "__main__":
func()
for item in file_path:
if ".xml" in item:
alter(item,"cat ","cat")
将.xml转换为.txt 【这里提供2种方法】【xml_to_txt.py】【xml2txt.py】
import os
import os.path
import xml.etree.ElementTree as ET
import glob
class_names = ['daoli','encoding']
xmlpath='C:/Users/Spectator/Desktop/ss'
txtpath='C:/Users/Spectator/Desktop/ss/ss'
def xml_to_txt(xmlpath,txtpath):
os.chdir(xmlpath)
annotations = os.listdir('.')
annotations = glob.glob(str(annotations)+'*.xml')
file_save = 'DaoliImage_13' + '.txt'
file_txt = os.path.join(txtpath, file_save)
f_w = open(file_txt, 'w',encoding='utf-8')
for i,file in enumerate(annotations):
in_file = open(file,encoding='utf-8')
tree=ET.parse(in_file)
root = tree.getroot()
filename = root.find('filename').text
print(filename)
for obj in root.iter('object'):
current = list()
name = obj.find('name').text
class_num = class_names.index(name)
xmlbox = obj.find('bndbox')
x1 = xmlbox.find('xmin').text
x2 = xmlbox.find('xmax').text
y1 = xmlbox.find('ymin').text
y2 = xmlbox.find('ymax').text
f_w.write(str(class_num)+' '+x1+' '+y1+' '+x2+' '+y2+' '+'\n')
xml_to_txt(xmlpath,txtpath)
import os
import xml.etree.ElementTree as ET
dirpath = 'C:/Users/Spectator/Desktop/myyolo/Annotations'
newdir = 'C:/Users/Spectator/Desktop/myyolo/labels'
if not os.path.exists(newdir):
os.makedirs(newdir)
for fp in os.listdir(dirpath):
root = ET.parse(os.path.join(dirpath,fp)).getroot()
xmin, ymin, xmax, ymax = 0,0,0,0
sz = root.find('size')
width = float(sz[0].text)
height = float(sz[1].text)
filename = root.find('filename').text
for child in root.findall('object'):
sub = child.find('bndbox')
label = child.find('name').text
xmin = float(sub[0].text)
ymin = float(sub[1].text)
xmax = float(sub[2].text)
ymax = float(sub[3].text)
try:
x_center = (xmin + xmax) / (2 * width)
y_center = (ymin + ymax) / (2 * height)
w = (xmax - xmin) / width
h = (ymax - ymin) / height
except ZeroDivisionError:
print(filename,'的 width有问题')
with open(os.path.join(newdir, fp.split('.')[0]+'.txt'), 'a+') as f:
f.write(' '.join(['0', '%.6f' % x_center, '%.6f' % y_center, '%.6f' % w, '%.6f' % h + '\n']))
print('ok')
更换txt文件里指定字符为其他指定字符
import os
def updateFile(file,old_str,new_str):
"""
将替换的字符串写到一个新的文件中,然后将原文件删除,新文件改为原来文件的名字
:param file: 文件路径
:param old_str: 需要替换的字符串
:param new_str: 替换的字符串
:return: None
"""
with open(file, "r", encoding="utf-8") as f1,open("%s.bak" % file, "w", encoding="utf-8") as f2:
for line in f1:
if old_str in line:
line = line.replace(old_str, new_str)
f2.write(line)
os.remove(file)
os.rename("%s.bak" % file, file)
updateFile(r"C:\Users\Spectator\Desktop\ss\train.txt", "/home/nvidia/darknet/scripts/VOCdevkit/VOC2007/JPEGImages", "")
绘制log-iou的曲线【log——train_iou_visualization.py 】
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
lines =4876
result = pd.read_csv('00_train_log_iou.txt', skiprows=[x for x in range(lines) if (x%10==0 or x%10==9) ] ,error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall','count'])
result.head()
result['Region Avg IOU']=result['Region Avg IOU'].str.split(': ').str.get(1)
result['Class']=result['Class'].str.split(': ').str.get(1)
result['Obj']=result['Obj'].str.split(': ').str.get(1)
result['No Obj']=result['No Obj'].str.split(': ').str.get(1)
result['Avg Recall']=result['Avg Recall'].str.split(': ').str.get(1)
result['count']=result['count'].str.split(': ').str.get(1)
result.head()
result.tail()
print(result['Region Avg IOU'])
result['Region Avg IOU']=pd.to_numeric(result['Region Avg IOU'])
result['Class']=pd.to_numeric(result['Class'])
result['Obj']=pd.to_numeric(result['Obj'])
result['No Obj']=pd.to_numeric(result['No Obj'])
result['Avg Recall']=pd.to_numeric(result['Avg Recall'])
result['count']=pd.to_numeric(result['count'])
result.dtypes
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['Region Avg IOU'].values,label='Region Avg IOU')
ax.legend(loc='best')
ax.set_title('The Region Avg IOU curves')
ax.set_xlabel('batches')
fig.savefig('Region Avg IOU')
绘制log-loss的曲线【train_loss_visualization.py】
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
lines =303
result = pd.read_csv('00_train_log_loss.txt', skiprows=[x for x in range(lines) if ((x%10!=9) |(x<1000))] ,error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result.head()
result['loss']=result['loss'].str.split(' ').str.get(1)
result['avg']=result['avg'].str.split(' ').str.get(1)
result['rate']=result['rate'].str.split(' ').str.get(1)
result['seconds']=result['seconds'].str.split(' ').str.get(1)
result['images']=result['images'].str.split(' ').str.get(1)
result.head()
result.tail()
print(result['loss'])
print(result['avg'])
print(result['rate'])
print(result['seconds'])
print(result['images'])
result['loss']=pd.to_numeric(result['loss'])
result['avg']=pd.to_numeric(result['avg'])
result['rate']=pd.to_numeric(result['rate'])
result['seconds']=pd.to_numeric(result['seconds'])
result['images']=pd.to_numeric(result['images'])
result.dtypes
fig = plt.figure()
ax = fig.add_subplot(830, 1, 1)
ax.plot(result['avg'].values,label='avg_loss')
ax.legend(loc='best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig('avg_loss')