ImageFolder、DataLoader加载CUB200数据集

只要给定每一类图像所在的文件夹的路径,即可完成全部的图片的加载:
例如:
ImageFolder、DataLoader加载CUB200数据集_第1张图片
但是问题是,如何划分训练集、测试集呢,使他们分别被加载!代码如下。
主要思想就是根据官方提供的数据集划分方式,使用shutil.copyfile()函数把训练集、测试集分别拷贝到不同的目录下!

import os
import numpy as np 
import shutil
# divivd dataset (without annotations)
img_dir = 'data/birds/'

save_dir = 'data/Birds/'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
save_dir_train = os.path.join(save_dir, 'train')
if not os.path.exists(save_dir_train):
    os.mkdir(save_dir_train)
save_dir_test = os.path.join(save_dir, 'test')
if not os.path.exists(save_dir_test):
    os.mkdir(save_dir_test)

f2 = open(os.path.join(img_dir, "images.txt"))
foo = f2.readlines()

f = open(os.path.join(img_dir, "train_test_split.txt"))
bar = f.readlines()

f3 = open(os.path.join(img_dir, "image_class_labels.txt"))
baz = f3.readlines()

for i in range(len(foo)):
    image_id   = foo[i].split(" ")[0]
    image_path = foo[i].split(" ")[1][:-1]
    image_name = image_path.split("/")[1]
    is_train = int(bar[i].split(" ")[1][:-1])
    classes = baz[i].split(" ")[1][:-1]
    # split train & test data
    if is_train:
        # make class dir
        try:
            os.mkdir(os.path.join(save_dir_train, classes))
        except:
            print("file already exists")
        src_path = os.path.join(img_dir, 'images', image_path)
        dst_path = os.path.join(save_dir_train, classes, image_name)        
    else:
        # make class dir
        try:
            os.mkdir(os.path.join(save_dir_test, classes))
        except:
            print("file already exists")
        src_path = os.path.join(img_dir, 'images', image_path)
        dst_path = os.path.join(save_dir_test, classes, image_name)        
    shutil.copyfile(src_path, dst_path)
    print("src:", src_path, "dst:", dst_path)

结果:
在这里插入图片描述
ImageFolder、DataLoader加载CUB200数据集_第2张图片
遇到问题;
ImageFolder、DataLoader加载CUB200数据集_第3张图片
我使用:

print(dataset.class_to_idx)

输出每一类对应的标签(即One-hot向量),发现是无序的,很不方便,于是保留原来文件夹的命名方式:
修改后的代码如下:

import os
import numpy as np
import shutil
# divivd dataset (without annotations)
img_dir = 'data/birds/'
save_dir = 'data/Birds_new/'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
save_dir_train = os.path.join(save_dir, 'train')
if not os.path.exists(save_dir_train):
    os.mkdir(save_dir_train)
save_dir_test = os.path.join(save_dir, 'test')
if not os.path.exists(save_dir_test):
    os.mkdir(save_dir_test)
f2 = open(os.path.join(img_dir, "images.txt"))
foo = f2.readlines()
f = open(os.path.join(img_dir, "train_test_split.txt"))
bar = f.readlines()
f3 = open(os.path.join(img_dir, "image_class_labels.txt"))
baz = f3.readlines()
for i in range(len(foo)):
    image_id   = foo[i].split(" ")[0]
    image_path = foo[i].split(" ")[1][:-1]
    image_name = image_path.split("/")[1]
    is_train = int(bar[i].split(" ")[1][:-1])
    classes = baz[i].split(" ")[1][:-1]
    # split train & test data
    if is_train:
        # make class dir
        try:
            os.mkdir(os.path.join(save_dir_train, image_path.split("/")[0]))
        except:
            print("file already exists")
        src_path = os.path.join(img_dir, 'images', image_path)
        dst_path = os.path.join(save_dir_train, image_path.split("/")[0], image_name)
    else:
        # make class dir
        try:
            os.mkdir(os.path.join(save_dir_test, image_path.split("/")[0]))
        except:
            print("file already exists")
        src_path = os.path.join(img_dir, 'images', image_path)
        dst_path = os.path.join(save_dir_test, image_path.split("/")[0], image_name)
    shutil.copyfile(src_path, dst_path)
    print("src:", src_path, "dst:", dst_path)

ImageFolder、DataLoader加载CUB200数据集_第4张图片
这样看起来更舒服一点!

你可能感兴趣的:(深度学习图像分类数据处理,深度学习)