将数据集(位置:D:/Code/Data/centerlinedata/tem_voc/JPEGImages/)下的621张图片按照划分比例(如 训练集(train):验证集(val):测试集(test)=6:2:2)进行拆分复制到新的文件夹(D:/Code/Data/GREENTdata/)并在该文件夹下创建train、val、teset三个文件夹
使用random.shuffle(current_data_index_list)打乱索引list的顺序
copy2()函数用来复制图片到另一个位置
import os
import random
from shutil import copy2
# 源文件夹路径
file_path = r"D:/Code/Data/centerlinedata/tem_voc/JPEGImages/"
# 新文件路径
new_file_path = r"D:/Code/Data/GREENTdata/"
# 划分数据比例6:2:2
split_rate = [0.6, 0.2, 0.2]
class_names = os.listdir(file_path)
# 目标文件夹下创建文件夹
split_names = ['train', 'val', 'test']
print(class_names) # ['00000.jpg', '00001.jpg', '00002.jpg'... ]
# 判断是否存在目标文件夹,不存在则创建---->创建train\val\test文件夹
if os.path.isdir(new_file_path):
pass
else:
os.makedirs(new_file_path)
for split_name in split_names:
split_path = new_file_path + "/" + split_name
print(split_path) # D:/Code/Data/GREENTdata/train, val, test
if os.path.isdir(split_path):
pass
else:
os.makedirs(split_path)
# 按照比例划分数据集,并进行数据图片的复制
for class_name in class_names:
current_data_path = file_path # D:/Code/Data/centerlinedata/tem_voc/JPEGImages/
current_all_data = os.listdir(current_data_path)
current_data_length = len(current_all_data) # 文件夹下的图片个数
current_data_index_list = list(range(current_data_length))
random.shuffle(current_data_index_list)
train_path = os.path.join(new_file_path, 'train/') # D:/Code/Data/GREENTdata/train/
val_path = os.path.join(new_file_path, 'val/') # D:/Code/Data/GREENTdata/val/
test_path = os.path.join(new_file_path, 'test/') # D:/Code/Data/GREENTdata/test/
train_stop_flag = current_data_length * split_rate[0]
val_stop_flag = current_data_length * (split_rate[0] + split_rate[1])
current_idx = 0
train_num = 0
val_num = 0
test_num = 0
# 图片复制到文件夹中
for i in current_data_index_list:
src_img_path = os.path.join(current_data_path, current_all_data[i])
if current_idx <= train_stop_flag:
copy2(src_img_path, train_path)
train_num += 1
elif (current_idx > train_stop_flag) and (current_idx <= val_stop_flag):
copy2(src_img_path, val_path)
val_num += 1
else:
copy2(src_img_path, test_path)
test_num += 1
current_idx += 1
print("Done!", train_num, val_num, test_num)
对应标签文件夹放入train_label中,代码如下:
import os
import random
from shutil import copy2
# 源文件夹路径
file_path = r"D:/Code/Data/centerlinedata/tem_voc/SegmentationClassPNG/"
# 新文件路径
new_file_path = r"D:/Code/Data/GREENTdata/"
# 匹配对应的文件夹
match_file_path = r"D:/Code/Data/GREENTdata/test/"
class_names = os.listdir(file_path)
match_names = os.listdir(match_file_path)
# 目标文件夹下创建文件夹
label_names = ['train_labels', 'val_labels', 'test_labels']
print(class_names) # ['00000.jpg', '00001.jpg', '00002.jpg'... ]
# 判断是否存在目标文件夹,不存在则创建---->创建train_label\val_label\test_label文件夹
if os.path.isdir(new_file_path):
pass
else:
os.makedirs(new_file_path)
for label_name in label_names:
split_path = new_file_path + label_name
# print(split_path) # D:/Code/Data/GREENTdata/train_label, val_label, test_label
if os.path.isdir(split_path):
pass
else:
os.makedirs(split_path)
# 按照比例划分数据集,并进行数据图片的复制
for class_name in class_names:
transF = os.path.splitext(class_name)
class_num = transF[0]
for match_name in match_names:
transF2 = os.path.splitext(match_name)
match_num = transF2[0]
if match_num == class_num:
src_img_path = os.path.join(file_path, class_name)
copy2(src_img_path, split_path)
print("Done!")