import glob
import math
import os
import shutil
import time
IMAGE_FORMAT = [".jpg", ".png"][0]
split_rate = 0.8
label_from = r"D:\Dataset\Annotation_temp"
image_from = r"D:\Dataset\Images_temp"
target_dir = r"D:\Dataset\QQ_SafeVerify"
target_dir = target_dir + "_" + str(round(time.time()))
def mk_directory(target_dir):
'''
创建目录结构
'''
print("创建目录结构")
target_dir_info = {}
paths = []
paths.append(target_dir)
temp = os.path.join(target_dir, "train")
paths.append(temp)
target_dir_info["train_images"] = os.path.join(temp, "images")
target_dir_info["train_labels"] = os.path.join(temp, "labels")
paths.append(os.path.join(temp, "images"))
paths.append(os.path.join(temp, "labels"))
temp = os.path.join(target_dir, "val")
paths.append(temp)
target_dir_info["val_images"] = os.path.join(temp, "images")
target_dir_info["val_labels"] = os.path.join(temp, "labels")
paths.append(os.path.join(temp, "images"))
paths.append(os.path.join(temp, "labels"))
paths.append(os.path.join(target_dir, "test"))
for dir in paths:
if not os.path.exists(dir):
os.mkdir(dir)
return target_dir_info
def split_samples(label_from, image_from, target_dir_info):
'''
从临时images和labels中按照分割比例,把文件移动到自定义结构的目标文件中
train
images 文件夹
labels 文件夹
classes.txt
...
val
images 文件夹
labels 文件夹
classes.txt
...
'''
print("分割数据集train_val")
ls0 = glob.glob(os.path.join(label_from, "*.txt"))
for p in ls0:
if p.endswith("classes.txt"):
continue
t = os.path.split(p)
print(t[1])
temp_file = t[1][:-4] + IMAGE_FORMAT
temp_file = os.path.join(image_from, temp_file)
if not os.path.exists(temp_file):
os.remove(p)
print("文件不存在:", temp_file)
ls = []
for p in ls0:
if p.endswith("classes.txt"):
if os.path.exists(p):
shutil.copy(p, target_dir_info["train_labels"])
shutil.move(p, target_dir_info["val_labels"])
continue
ls.append(p)
total = len(ls)
train_num = math.ceil(total * split_rate)
valid_num = total - train_num
print("总体样本数量total:", total)
print("训练集样本数量train_num:", train_num)
print("验证集样本数量valid_num:", valid_num)
for i, v in enumerate(ls):
to_image_dir = target_dir_info["train_images"] if i < train_num else target_dir_info["val_images"]
to_label_dir = target_dir_info["train_labels"] if i < train_num else target_dir_info["val_labels"]
print("to_image_dir", to_image_dir)
print("to_label_dir", to_label_dir)
t = os.path.split(v)
print(t[1])
from_file = os.path.join(label_from, t[1])
to_file = os.path.join(to_label_dir, t[1])
shutil.move(from_file, to_file)
from_file = os.path.join(image_from, t[1])
from_file = from_file.replace(".txt", IMAGE_FORMAT)
to_file = os.path.join(to_image_dir, t[1])
to_file = to_file.replace(".txt", IMAGE_FORMAT)
shutil.move(from_file, to_file)
def mk_Annotation():
'''
模拟生成Annotation.txt文件
'''
print("模拟生成Annotation.txt文件")
ls = glob.glob(os.path.join(label_from, "*.txt"))
if len(ls) >= 1:
return
with open(os.path.join(label_from, "classes.txt"), "w") as f:
f.write("1")
ls = glob.glob(os.path.join(image_from, "*" + IMAGE_FORMAT))
ls = ls[:10]
for p in ls:
print(p)
t = os.path.split(p)
print(t[1][:-4])
file = os.path.join(label_from, t[1][:-4] + ".txt")
with open(file, "w") as f:
...
if __name__ == '__main__':
target_dir_info = mk_directory(target_dir)
print(target_dir_info)
mk_Annotation()
split_samples(label_from, image_from, target_dir_info)
print("分割完成:", target_dir)