例如:对于一个六种类别的数据集,已知每种类别的对应的图像序列,且每个类别下图像序列对应了一个文件夹,即共有六个图像序列的文件夹,和一个包含所有类别的标签文件夹(即,标签文件都在同一个文件夹下),现需要为每个类别分配自己对应的标签文件夹。
import os
import random
import numpy as np
from numpy import *
def check_dirs(destination_folder):
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
def change_classes(imagesPath, txtFilePath, saveFilePath, class_num):
check_dirs(saveFilePath)
total_txt = os.listdir(imagesPath) # 返回指定的文件夹包含的文件或文件夹的名字的列表
num = len(total_txt)
list = range(num) #创建从0到num的整数列表
files = os.listdir(saveFilePath)
for i in list: #遍历每一个文件
name = total_txt[i].split(".")[0]+".txt"
readfile = open(txtFilePath+"/"+name, 'r') #读取文件
fline = readfile.readlines() #读取txt文件中每一行,fline是列表类型
savetxt = open(saveFilePath+"/"+name,'w+')
for temp in fline:
#print(type(temp)) fline列表里的元素是str类型
list1=temp.split()#用空格分割,符合YOLO标签类型
#list1为['1', '0.445783', '0.490000', '0.506024', '0.873333']
list1[0] = class_num - 1 # 将列表中的第一个数据修改为它,即需要的类别;从0开始,0表示第一个类别
b = " ".join(list1) # 将列表转换成字符串类型,且用空格分割
savetxt.write(b) #写入新的文件中
savetxt.write('\n')#换行
if __name__ == '__main__':
image_classes_num_list = [1,2,3,4,5,6]
className = "class"
for num in image_classes_num_list:
# 图像文件夹路径
# 每个类对应的文件夹名:class1, class2, class3, ... ;
imagesPath = r"E:/work/datasets/images/" + className + str(num)
# 原始txt文件所在文件夹,文件夹可以有一个或多个txt文件
txtFilePath = r"E:/work/datasets/labels"
# 更改后txt文件存放的文件夹
# 每个类对应的文件夹名:class1_labels, class2_labels, class3_labels, ... ;
saveFilePath = r"E:/work/datasets/new_images/" + className + str(num) + "_labels"
change_classes(imagesPath, txtFilePath, saveFilePath, num)
通过对比images和labels不同,并删除未标注的图片
import os.path
import shutil
import os
def findAllFilePathByType(rootPath,fileType):
'''
查找某个文件夹下的所有文件类型为fileType类型的文件路径,不论有多少层级的子文件夹
#rootPath为根文件夹路径
'''
dir_name = []
all_files_path = []
files_name = []
count_file_num = 1
# 获取多个存储图像序列的文件夹名字
for root, dirs, files in os.walk(rootPath):
for dir in dirs:
dir_name.append(str(dir))
count_file_num = count_file_num + 1
if files:
for file in files:
if file.endswith(fileType):
files_name.append(file)
path = os.path.join(root, file)
all_files_path.append(path)
return all_files_path,files_name
def jpg_to_txt(jpgList):
txtList = []
for jpg in jpgList:
txtList.append(jpg.split(".")[0]+".txt")
return txtList
def txt_to_jpg(txtList):
jpgList = []
for jpg in txtList:
jpgList.append(jpg.split(".")[0]+".jpg")
return jpgList
def compare_images_labels(images_files_name_txt, labels_files_name):
dif_name_list = []
for img_name in images_files_name_txt:
if img_name not in labels_files_name:
dif_name_list.append(img_name)
return dif_name_list
def remove_files(root_path, files_list):
for fileName in files_list:
file_path = os.path.join(root_path, fileName)
os.remove(file_path)
def main():
rootImagesPath = "E:/work/数据集/images/"
rootLabelsPath = "E:/work/数据集/labels/"
# 获取图像序列的列表
all_files_path, images_files_name = findAllFilePathByType(rootImagesPath,".jpg")
# 将jpg列表转为txt,与labels格式保存一致,以便对比
images_files_name_txt = jpg_to_txt(images_files_name)
# 获取图像标注的列表
all_files_path, labels_files_name = findAllFilePathByType(rootLabelsPath,".txt")
# 对比不同
different_name_txt = compare_images_labels(images_files_name_txt, labels_files_name)
different_img_name = txt_to_jpg(different_name_txt)
# 移除与标注文件名不同的图像序列文件
remove_files(rootImagesPath, different_img_name)
if __name__=="__main__":
main()
import os
import stat
def findAllFilePathByType(rootPath,fileType):
'''
查找某个文件夹下的所有文件类型为fileType类型的文件路径,不论有多少层级的子文件夹
#rootPath为根文件夹路径
'''
dir_name = []
all_files_path = []
files_name = []
count_file_num = 1
# 获取多个存储图像序列的文件夹名字
for root, dirs, files in os.walk(rootPath):
for dir in dirs:
dir_name.append(str(dir))
count_file_num = count_file_num + 1
if files:
for file in files:
if file.endswith(fileType):
files_name.append(file)
path = os.path.join(root, file)
all_files_path.append(path)
return all_files_path,files_name
def clearBlankLine(all_files_path, target_files_path):
for source_file_path in all_files_path:
file_name = source_file_path.split("\\")[-1]
target_file_path = os.path.join(target_files_path,file_name)
print(target_file_path)
if not os.path.exists(target_file_path):
file = open(target_file_path, 'w', encoding='utf-8') # 生成没有空行的文件
file.close()
os.chmod(target_file_path, stat.S_IRWXU) # 给用户全部权限
clearBlank(source_file_path, target_file_path)
def clearBlank(source_file_path, target_file_path):
file1 = open(source_file_path, 'r', encoding='utf-8') # 要去掉空行的文件
file2 = open(target_file_path, 'w', encoding='utf-8') # 去掉空行的文件的文件
try:
for line in file1.readlines():
if line == '\n':
line = line.strip("\n")
file2.write(line)
finally:
file1.close()
file2.close()
if __name__ == '__main__':
rootPath = r"E:/work/data/labels"
targetPath = r"E:/work/data/labels_test"
# 创建一个新的文件夹
if not os.path.exists(targetPath):
os.makedirs(targetPath)
# 设置权限,参数为octal权限值
os.chmod(targetPath, stat.S_IRWXU) # 设置用户可读可写可执行
# 读取文件夹下的文件,并以列表形式存储其路径
all_files_path,files_name = findAllFilePathByType(rootPath,"")
# print(all_files_path)
clearBlankLine(all_files_path, targetPath)
根据类别对应的图像序列,找到对应的标签文件并移动到指定标签文件夹下。
import os.path
import shutil
import os
# 寻找文件夹下的文件名
def findAllFilePathByType(rootPath,fileType):
'''
查找某个文件夹下的所有文件类型为fileType类型的文件路径,不论有多少层级的子文件夹
#rootPath为根文件夹路径
'''
dir_name = []
all_files_path = []
files_name = []
count_file_num = 1
# 获取多个存储图像序列的文件夹名字
for root, dirs, files in os.walk(rootPath):
for dir in dirs:
dir_name.append(str(dir))
count_file_num = count_file_num + 1
if files:
for file in files:
if file.endswith(fileType):
files_name.append(file)
path = os.path.join(root, file)
all_files_path.append(path)
return all_files_path,files_name
#移动文件模块
def copy_files(source_folder, destination_folder, images_name_list):
# 检查目标文件夹是否存在,如果不存在则创建
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
# 遍历源文件夹中的文件
for filename in images_name_list:
#要修改文件名的路径
source_file = os.path.join(source_folder, filename) # 图片的文件夹路径
destination_file = os.path.join(destination_folder, filename)
# 复制文件
shutil.copy(source_file, destination_file)
print(f"已复制文件: {filename} 到 {destination_folder}")
def txt_to_jpg(txtList):
jpgList = []
for jpg in txtList:
jpgList.append(jpg.split(".")[0]+".jpg")
return jpgList
def main():
classes_num_list = [1,2,3,4,5,6]
className = "class"
for num in classes_num_list:
# 标签路径, E:\work\datasets\labels\class1_labels
labels_root_path = r"E:/work/datasets/labels/" + className + str(num) + "_labels"
# 图像序列的目标路径, E:\work\datasets\images\class1
images_target_root_path = r"E:/work/datasets/images/" + className + str(num)
#原有图片所在路径
images_source_root_path = r'F:/1_work/Datasets/test_dataset' # 移动到目标文件夹
# 获取图像序列的列表
all_files_path, labels_files_name = findAllFilePathByType(labels_root_path,"")
images_name_list = txt_to_jpg(labels_files_name)
# 执行复制操作
copy_files(images_source_root_path, images_target_root_path, images_name_list)
if __name__=="__main__":
main()
# 导入所需要的库
import cv2
import os
import numpy as np
def findAllFilePathByType(rootPath,fileType):
'''
查找某个文件夹下的所有文件类型为fileType类型的文件路径,不论有多少层级的子文件夹
#rootPath为根文件夹路径
'''
dir_name = []
all_files_path = []
files_name = []
count_file_num = 1
# 获取多个存储图像序列的文件夹名字
for root, dirs, files in os.walk(rootPath):
for dir in dirs:
dir_name.append(str(dir))
count_file_num = count_file_num + 1
if files:
for file in files:
if file.endswith(fileType):
files_name.append(file)
path = os.path.join(root, file)
all_files_path.append(path)
return all_files_path,files_name
# 定义保存图片函数
# (1)image:要保存的图片名字;(2)addr:图片地址与相片名字的前部分;(3)num: 相片,名字的后缀。int 类型
def save_image(image, addr, num):
address = addr + str(num) + '.jpg'
cv2.imwrite(address, image)
def get_video_to_img(source_video_path, target_image_path):
# 读取视频文件 视频文件路径
videoCapture = cv2.VideoCapture(source_video_path)
# 通过摄像头的方式
# videoCapture=cv2.VideoCapture(1)
success, frame = videoCapture.read() # 读帧
i = 0
timeF = 60
j = 0
while success:
i = i + 1
if (i % timeF == 0):
j = j + 1
save_image(frame, target_image_path, j) #视频截成图片存放的位置
print('save image:', i)
success, frame = videoCapture.read()
def run_video_to_image():
source_image_root_path = r"/home/y/Code/new_datasets"
target_image_root_path = r"/home/y/Code/save_datasets"
if not os.path.exists(target_image_root_path):
os.makedirs(target_image_root_path)
# 找到文件夹下对应的所有视频路径
all_images_path,images_name = findAllFilePathByType(source_image_root_path,"")
#
for source_video_path in all_images_path:
target_image_path = target_image_root_path + "/" + source_video_path.split("/")[-1].split(".")[-2]+"/"
if not os.path.exists(target_image_path):
os.makedirs(target_image_path)
get_video_to_img(source_video_path, target_image_path)
run_video_to_image()
import os.path
import shutil
import os
num = 0 #修改文件名的数量词
imge_count = 7653
#保存图片模块
def moveFiles(path, disdir): # path为原始路径,disdir是移动的目标目录
dirlist = os.listdir(path)
for i in dirlist:
child = os.path.join('%s/%s' % (path, i))
if os.path.isfile(child):
imagename, jpg = os.path.splitext(i) # 分开文件名和后缀
shutil.copy(child, os.path.join(disdir, imagename + ".jpg"))#保存格式自己设置
# 复制后改为原来图片名称
# 也可以用shutil.move()
continue
moveFiles(child, disdir)
#重命名模块
def rename(img_folder):
for img_name in os.listdir(img_folder): # os.listdir():列出路径下所有的文件
#os.path.join() 拼接文件路径
global imge_count
src = os.path.join(img_folder, img_name) #src:要修改的目录名
dst = os.path.join(img_folder, 'Monkey_'+ str(imge_count) + '.jpg') #dst:修改后的目录名 注意此处str(num)将num转化为字符串,继而拼接
imge_count= imge_count + 1
os.rename(src, dst) #用dst替代src
def get_source_img_path(rootPath):
file_ls = []
for root, dirs, files in os.walk(rootPath):
if files:
root_file_ls = [os.path.join(root, file) for file in files]
file_ls.append(root_file_ls)
return file_ls
def get_child_dirsName(rootPath):
childDirsName = []
for root, dirs, files in os.walk(rootPath):
for dir in dirs:
childDirsName.append(str(dir))
return childDirsName
def main():
rootPath = "/home/y/Code/save_datasets/"
childDirsName = get_child_dirsName(rootPath)
for dir_name in childDirsName:
#要修改文件名的路径
img_folder0 = rootPath + dir_name #图片的文件夹路径 直接放你的文件夹路径即可
rename(img_folder0)
#保存图片代码
disdir = '/home/y/Code/dataset_images' # 移动到目标文件夹
moveFiles(img_folder0, disdir)
if __name__=="__main__":
main()
将数据集划分为训练集和验证集,划分比例分别为9:1。
import os, random, shutil
def moveimg(fileDir, tarDir):
pathDir = os.listdir(fileDir) # 取图片的原始路径
filenumber = len(pathDir)
rate = 0.1 # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取一定数量图片
sample = random.sample(pathDir, picknumber) # 随机选取picknumber数量的样本图片
print(sample)
for name in sample:
shutil.move(fileDir + name, tarDir + "/" + name)
return
def movelabel(file_list, file_label_train, file_label_val):
for i in file_list:
if i.endswith('.jpg'):
# filename = file_label_train + "\\" + i[:-4] + '.xml' # 可以改成xml文件将’.txt‘改成'.xml'就可以了
filename = file_label_train + "/" + i[:-4] + '.txt' # 可以改成xml文件将’.txt‘改成'.xml'就可以了
if os.path.exists(filename):
shutil.move(filename, file_label_val)
print(i + "处理成功!")
if __name__ == '__main__':
DatasetsList = [1, 2, 3, 4, 5, 6]
rootDir = r"G:/work/datasets/datasets-ALL-six-split/"
className = "class"
for num in DatasetsList:
fileDir = rootDir + r"train/images/" + className + str(num) + "/" # 源图片文件夹路径
tarDir = rootDir + r"val/images/" + className + str(num) + "/" # 图片移动到新的图片文件夹路径
moveimg(fileDir, tarDir)
file_list = os.listdir(tarDir)
file_label_train = rootDir + r"train/labels/" + className + str(num) + "/" # 源图片对应的标签路径
file_label_val = rootDir + r"val/labels/" + className + str(num) + "/" # 标签文件移动到新的标签文件夹路径
# 移动到新的文件路径
movelabel(file_list, file_label_train, file_label_val)
import os
import random
import numpy as np
from numpy import *
import shutil
images_start_num = 1790
def check_dirs(destination_folder):
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
def rename_and_change_classes(imagesPath, txtFilePath, saveImagePath, saveFilePath, class_number):
check_dirs(saveFilePath)
total_txt = os.listdir(imagesPath) # 返回指定的文件夹包含的文件或文件夹的名字的列表
num = len(total_txt)
list = range(num) #创建从0到num的整数列表
files = os.listdir(saveFilePath)
for i in list: #遍历每一个文件
# 图像和文件的新名字
global images_start_num
img_new_name = 'BulletHole_' + str(images_start_num) + '.jpg'
# 复制并重新命名图像名字
# 复制文件
shutil.copy(os.path.join(imagesPath, total_txt[i]), os.path.join(saveImagePath, total_txt[i]))
# 重命名图像文件
os.rename(os.path.join(saveImagePath, total_txt[i]), os.path.join(saveImagePath, img_new_name))
# 复制与图像对应的标签文件并修改名字
name = total_txt[i].split(".")[0]+".txt"
readfile = open(txtFilePath + "\\" +name, 'r') #读取文件
fline = readfile.readlines() #读取txt文件中每一行,fline是列表类型
label_new_name = 'BulletHole_' + str(images_start_num) + '.txt'
savetxt = open(saveFilePath+"\\"+label_new_name,'w+')
for temp in fline:
#print(type(temp)) fline列表里的元素是str类型
list1 = temp.split()#用空格分割,符合YOLO标签类型
print(list1)
#list1为['1', '0.445783', '0.490000', '0.506024', '0.873333']
list1[0] = class_number - 1 # 将列表中的第一个数据修改为它,即需要的类别;从0开始,0表示第一个类别
# b = " ".join(list1) # 将列表转换成字符串类型,且用空格分割
b = " ".join(str(x) for x in list1)
savetxt.write(b) #写入新的文件中
savetxt.write('\n')#换行
images_start_num = images_start_num + 1
if __name__ == '__main__':
# image_classes_num_list = [1,2,3,4,5,6]
image_classes_num_list = [4]
className = "BulletHole"
for num in image_classes_num_list:
# 图像文件夹路径
# 每个类对应的文件夹名:class1, class2, class3, ... ;
imagesPath = "E:\\data\\images"
# 原始txt文件所在文件夹,文件夹可以有一个或多个txt文件
txtFilePath = "E:\\data\\labels"
# 更改后txt文件存放的文件夹
# 每个类对应的文件夹名:class1_labels, class2_labels, class3_labels, ... ;
saveFilePath = "E:\\data\\" + className + str(num) + "_labels"
# 保存图像的路径
saveImagePath = "E:\\data\\" + className + str(num)
rename_and_change_classes(imagesPath, txtFilePath, saveImagePath, saveFilePath, num)