python-将数据集划分为训练集和验证集

#将数据集拆分为训练集和测试集:


val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)
#按照如下方式打印每个数据集的长度:


print(tf.data.experimental.cardinality(train_ds).numpy())
print(tf.data.experimental.cardinality(val_ds).numpy())
# -*- coding: utf-8 -*-

import os
import random
import sys
import shutil #移动图片
trainval_percent = 0.2# 将数据集分为训练集和验证集的比例
train_percent = 0.8

src_filepath = r'原数据集地址'     #注意!!!!!运行结束原数据集改为0.8的traindata
dst_val = r"保存的验证集地址"

filenames=os.listdir(src_filepath)
print(filenames)
num = len(filenames)
print(num)
print("运行到行号:",sys._getframe().f_lineno)
for filename in filenames:
    print(filename)
    filename_path=src_filepath+'/'+filename
    print(filename_path)
    filename_path_images_name = os.listdir(filename_path)
    print(len(filename_path_images_name))
    num=len(filename_path_images_name)

    list=range(num)
    tv=int(num*trainval_percent)
    print("tv=",tv)
    trainval= random.sample(list,tv)
    print("trainval=",trainval)
    for i in trainval:
        trainval_name = filename_path_images_name[i]
        print("trainval_name=", trainval_name)
        trainval_name_path=filename_path+'/'+trainval_name
        trainval_name_path_1 = dst_val + filename + '/'   #目标地址
        if not os.path.exists(trainval_name_path_1):
            print('[ERROR]路径不正确: %s' % trainval_name_path_1)
            os.makedirs(trainval_name_path_1)
        trainval_name_path_new=trainval_name_path_1+trainval_name
        print("trainval_name_path=", trainval_name_path)        #源地址+图片名称
        print("trainval_name_path_new=", trainval_name_path_new)#目标地址+图片名称
        shutil.move(trainval_name_path, trainval_name_path_new)


print("结束 运行到行号:",sys._getframe().f_lineno)

​​​​​​​

你可能感兴趣的:(python)