如何将训练图片变成datasets.ImageFolder格式

如何将训练图片变成datasets.ImageFolder格式

训练用的图片往往是给了训练集的.csv文件和所有训练的图片,如何将图片变为datasets.ImageFolder格式?
其中ImageFolder格式为:
在这里插入图片描述
其中训练集的图片一个文件夹,测试集的图片一个文件夹。
代码如下:

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
val_ratio = 0.2

train_path='train_images/'
val_path='val_images/'
test_path='test_images/'

print('train data shape: ' ,train_data.shape)
print('test data shape: ' ,test_data.shape)
# print(train_data.iloc[:,1].unique())
# print(train_data.iloc[:,1].count())
#将训练集切分成训练集和验证集
label_list = train_data.iloc[:,1].tolist()
label_dict = dict(zip(*np.unique(label_list,return_counts = True)))
# print(label_dict)
if not os.path.exists(train_path):
    os.makedirs(train_path)
if not os.path.exists(val_path):
    os.makedirs(val_path)


image_paths = train_data.iloc[:,0] 
labels = train_data.iloc[:,1] 
count_dict = label_dict.copy()
for k, v in count_dict.items():
    count_dict[k] = 0
# print(count_dict)
# print(label_dict)

#分别将文件复制到train_images和val_images
for a,b in zip(image_paths,labels):
    print(count_dict[b])
    print(label_dict[b]*(1-val_ratio))
    if count_dict[b] <= label_dict[b]*(1-val_ratio):
        sub_train_path = train_path + b
        if not os.path.exists(sub_train_path):
            os.makedirs(sub_train_path)
        print(sub_train_path)
        count_dict[b] += 1
        shutil.copy(a,sub_train_path)
    else:
        sub_val_path = val_path + b
        if not os.path.exists(sub_val_path):
            os.makedirs(sub_val_path)
        print(sub_val_path)
        count_dict[b] += 1
        shutil.copy(a,sub_val_path)

你可能感兴趣的:(python,机器学习,人工智能)