【深度学习 招式篇】制作VOC数据集必看! 各种快捷所需python代码 YOLO 训练自己的数据集

【深度学习 招式篇】制作VOC数据集必看! 各种快捷所需python代码 YOLO 训练自己的数据集

    • 批量 更改该目录下所有文件名 【change_mingzi.py】
    • 遍历一个文件夹下所有文件,提取生成目录形式的.txt文件【tiqu_name.py】
    • 批量查找 错误标记的.xml文件 遍历目录替换更改错误标注 比如cat(空格)改成cat 【change_label.py】
    • 将.xml转换为.txt 【这里提供2种方法】【xml_to_txt.py】【xml2txt.py】
    • 更换txt文件里指定字符为其他指定字符
    • 绘制log-iou的曲线【log——train_iou_visualization.py 】
    • 绘制log-loss的曲线【train_loss_visualization.py】

批量 更改该目录下所有文件名 【change_mingzi.py】

# -*- coding: utf-8 -*-
import os
#设定文件路径
path='E:\\标注数据\\ceshi\\'
i=1
#对目录下的文件进行遍历
for file in os.listdir(path):
#判断是否是文件
    if os.path.isfile(os.path.join(path,file))==True:
#设置新文件名
        new_name=file.replace(file,"ZaoxiuImage_%d.jpg"%i)
#重命名
        os.rename(os.path.join(path,file),os.path.join(path,new_name))
        i+=1
#结束
print ("End")

遍历一个文件夹下所有文件,提取生成目录形式的.txt文件【tiqu_name.py】

import os
import re
images_path = 'E:\\yolo_tf2.1\\VOCdevkit\\VOC\\Annotations\\'   # 图片存放目录
txt_save_path = E:\\yolo_tf2.1\\VOCdevkit\VOC\\zaoxiu.txt'  # 生成的图片列表清单txt文件名
fw = open(txt_save_path, "w")
# 读取函数,用来读取文件夹中的所有函数,输入参数是文件名
def read_directory(directory_name):
	for filename in os.listdir(directory_name):
		# print(filename)  # 仅仅是为了测试
		fw.write(filename + '\n')  # 打印成功信息
		# img = cv2.imread(directory_name + "/" + filename)
		# #####显示图片#######
		# cv2.imshow(filename, img)
		# cv2.waitKey(0)
		# #####################
		#
		# #####保存图片#########
		# cv2.imwrite("D://wangyang//face1" + "/" + filename, img)


read_directory(images_path)#这里传入所要读取文件夹的绝对路径,加引号(引号不能省略!)

批量查找 错误标记的.xml文件 遍历目录替换更改错误标注 比如cat(空格)改成cat 【change_label.py】

import os
 
file_path = []
def func():
    fileDir = r"C:\\Users\\Spectator\\Desktop\\tihuan\\1"
    for root, dirs, files in os.walk(fileDir):  
        for fileitem in files:
            file_path.append('{}/{}'.format(root,fileitem))
 
def alter(file,old_str,new_str):
    file_data = ""
    with open(file, "r") as f:
        for line in f:
            if old_str in line:
                line = line.replace(old_str,new_str)
            file_data += line
    with open(file,"w") as f:
        f.write(file_data)
           
if __name__ == "__main__":
    func()
    for item in file_path:
        if ".xml" in item:
            alter(item,"cat ","cat")

将.xml转换为.txt 【这里提供2种方法】【xml_to_txt.py】【xml2txt.py】

import os
import os.path
import xml.etree.ElementTree as ET
import glob

class_names = ['daoli','encoding']
xmlpath='C:/Users/Spectator/Desktop/ss'
txtpath='C:/Users/Spectator/Desktop/ss/ss'


def xml_to_txt(xmlpath,txtpath):

    os.chdir(xmlpath)
    annotations = os.listdir('.')
    annotations = glob.glob(str(annotations)+'*.xml')

    
    file_save = 'DaoliImage_13' + '.txt'
    file_txt = os.path.join(txtpath, file_save)
    f_w = open(file_txt, 'w',encoding='utf-8')

    for i,file in enumerate(annotations):

        in_file = open(file,encoding='utf-8')
        tree=ET.parse(in_file)
        root = tree.getroot()


        filename = root.find('filename').text
        print(filename)

        for obj in root.iter('object'):
                current = list()
                name = obj.find('name').text

                class_num = class_names.index(name)

                xmlbox = obj.find('bndbox')

                x1 = xmlbox.find('xmin').text
                x2 = xmlbox.find('xmax').text
                y1 = xmlbox.find('ymin').text
                y2 = xmlbox.find('ymax').text

                f_w.write(str(class_num)+' '+x1+' '+y1+' '+x2+' '+y2+' '+'\n')

xml_to_txt(xmlpath,txtpath)
import os
import xml.etree.ElementTree as ET

dirpath = 'C:/Users/Spectator/Desktop/myyolo/Annotations'     #原来存放xml文件的目录
newdir = 'C:/Users/Spectator/Desktop/myyolo/labels'  #修改label后形成的txt目录

if not os.path.exists(newdir):
    os.makedirs(newdir)

for fp in os.listdir(dirpath):

    root = ET.parse(os.path.join(dirpath,fp)).getroot()

    xmin, ymin, xmax, ymax = 0,0,0,0
    sz = root.find('size')
    
    width = float(sz[0].text)
    height = float(sz[1].text)
    filename = root.find('filename').text
    for child in root.findall('object'):         #找到图片中的所有框
        #print(child.find('name').text)
    
        sub = child.find('bndbox')               #找到框的标注值并进行读取
        label = child.find('name').text
        xmin = float(sub[0].text)
        ymin = float(sub[1].text)
        xmax = float(sub[2].text)
        ymax = float(sub[3].text)
        try:                                     #转换成yolov3的标签格式,需要归一化到(0-1)的范围内
            x_center = (xmin + xmax) / (2 * width)
            y_center = (ymin + ymax) / (2 * height)
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height
        except ZeroDivisionError:
            print(filename,'的 width有问题')

        with open(os.path.join(newdir, fp.split('.')[0]+'.txt'), 'a+') as f:
            f.write(' '.join(['0', '%.6f' % x_center, '%.6f' % y_center, '%.6f' % w, '%.6f' % h + '\n']))

print('ok')

更换txt文件里指定字符为其他指定字符

import os
def updateFile(file,old_str,new_str):
    """
    将替换的字符串写到一个新的文件中,然后将原文件删除,新文件改为原来文件的名字
    :param file: 文件路径
    :param old_str: 需要替换的字符串
    :param new_str: 替换的字符串
    :return: None
    """
    with open(file, "r", encoding="utf-8") as f1,open("%s.bak" % file, "w", encoding="utf-8") as f2:
        for line in f1:
            if old_str in line:
                line = line.replace(old_str, new_str)
            f2.write(line)
    os.remove(file)
    os.rename("%s.bak" % file, file)

updateFile(r"C:\Users\Spectator\Desktop\ss\train.txt", "/home/nvidia/darknet/scripts/VOCdevkit/VOC2007/JPEGImages", "")#将"D:\zdz\"路径的myfile.txt文件把所有的zdz改为daziran

绘制log-iou的曲线【log——train_iou_visualization.py 】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline

lines =4876
result = pd.read_csv('00_train_log_iou.txt', skiprows=[x for x in range(lines) if (x%10==0 or x%10==9) ] ,error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall','count'])
result.head()

result['Region Avg IOU']=result['Region Avg IOU'].str.split(': ').str.get(1)
result['Class']=result['Class'].str.split(': ').str.get(1)
result['Obj']=result['Obj'].str.split(': ').str.get(1)
result['No Obj']=result['No Obj'].str.split(': ').str.get(1)
result['Avg Recall']=result['Avg Recall'].str.split(': ').str.get(1)
result['count']=result['count'].str.split(': ').str.get(1)
result.head()
result.tail()

#print(result.head())
# print(result.tail())
# print(result.dtypes)
print(result['Region Avg IOU'])

result['Region Avg IOU']=pd.to_numeric(result['Region Avg IOU'])
result['Class']=pd.to_numeric(result['Class'])
result['Obj']=pd.to_numeric(result['Obj'])
result['No Obj']=pd.to_numeric(result['No Obj'])
result['Avg Recall']=pd.to_numeric(result['Avg Recall'])
result['count']=pd.to_numeric(result['count'])
result.dtypes

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['Region Avg IOU'].values,label='Region Avg IOU')
#ax.plot(result['Class'].values,label='Class')
#ax.plot(result['Obj'].values,label='Obj')
#ax.plot(result['No Obj'].values,label='No Obj')
#ax.plot(result['Avg Recall'].values,label='Avg Recall')
#ax.plot(result['count'].values,label='count')
ax.legend(loc='best')
#ax.set_title('The Region Avg IOU curves')
ax.set_title('The Region Avg IOU curves')
ax.set_xlabel('batches')
#fig.savefig('Avg IOU')
fig.savefig('Region Avg IOU')

绘制log-loss的曲线【train_loss_visualization.py】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline

lines =303
result = pd.read_csv('00_train_log_loss.txt', skiprows=[x for x in range(lines) if ((x%10!=9) |(x<1000))] ,error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result.head()

result['loss']=result['loss'].str.split(' ').str.get(1)
result['avg']=result['avg'].str.split(' ').str.get(1)
result['rate']=result['rate'].str.split(' ').str.get(1)
result['seconds']=result['seconds'].str.split(' ').str.get(1)
result['images']=result['images'].str.split(' ').str.get(1)
result.head()
result.tail()

#print(result.head())
# print(result.tail())
# print(result.dtypes)

print(result['loss'])
print(result['avg'])
print(result['rate'])
print(result['seconds'])
print(result['images'])

result['loss']=pd.to_numeric(result['loss'])
result['avg']=pd.to_numeric(result['avg'])
result['rate']=pd.to_numeric(result['rate'])
result['seconds']=pd.to_numeric(result['seconds'])
result['images']=pd.to_numeric(result['images'])
result.dtypes


fig = plt.figure()
ax = fig.add_subplot(830, 1, 1)
ax.plot(result['avg'].values,label='avg_loss')
#ax.plot(result['loss'].values,label='loss')
ax.legend(loc='best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig('avg_loss')
#fig.savefig('loss')


你可能感兴趣的:(#,【深度学习,招式篇】【工程应用问题】,【深度学习】,python,深度学习,数据挖掘,机器学习,计算机视觉)