数据集 | 抽取

从一个dataset文件夹随机抽取45000张图片,放入train文件夹,随机抽取5000张图片,放入test文件夹,train和test文件夹没有交集,dataset文件夹中包括100000张图片分别是ILSVRC2012_test_00000001.JPEG到ILSVRC2012_test_00100000.JPEG

import os
import random
import shutil

# 定义源文件夹、训练文件夹和测试文件夹的路径
dataset_folder = "/home/caobin/dongb/dataset/ImageNet/total"
train_folder = "/home/caobin/dongb/dataset/ImageNet/train"
test_folder = "/home/caobin/dongb/dataset/ImageNet/test"
num_images_to_copy_train = 45000
num_images_to_copy_test = 5000

# 获取源文件夹中所有图片的文件名列表
image_files = os.listdir(dataset_folder)

# 确保训练和测试文件夹存在,如果不存在则创建它们
if not os.path.exists(train_folder):
    os.makedirs(train_folder)

if not os.path.exists(test_folder):
    os.makedirs(test_folder)

# 随机选择要复制的图片到训练文件夹
selected_train_images = random.sample(image_files, num_images_to_copy_train)

# 复制选定的图片到训练文件夹
for image_name in selected_train_images:
    source_path = os.path.join(dataset_folder, image_name)
    target_path = os.path.join(train_folder, image_name)
    shutil.copy(source_path, target_path)

print(f"已从{dataset_folder}中随机选择并复制了{num_images_to_copy_train}张图片到{train_folder}。")

# 从剩余的图片中随机选择要复制的图片到测试文件夹
remaining_images = list(set(image_files) - set(selected_train_images))
selected_test_images = random.sample(remaining_images, num_images_to_copy_test)

# 复制选定的图片到测试文件夹
for image_name in selected_test_images:
    source_path = os.path.join(dataset_folder, image_name)
    target_path = os.path.join(test_folder, image_name)
    shutil.copy(source_path, target_path)

print(f"已从{dataset_folder}中随机选择并复制了{num_images_to_copy_test}张图片到{test_folder}。")

你可能感兴趣的:(人工智能,机器学习,深度学习)