FGVC-Aircraft 数据集划分 - python 代码

划分好的数据集下载

FGVC Aircraft dataset by classes folder

文件路径说明

src_dir 为数据集压缩包解压后未改动的data目录
运行代码会在dst_dir下生成fgvc_familyfgvc_manufacturerfgvc_variant 三个文件夹
fgvc_familyfgvc_variant划分下F/A-18将会被替换为F_A-18以免破坏文件路径。

注:解压数据集命令:tar -xzvf fgvc-aircraft-2013b.tar.gz

代码


import pandas as pd
import os, shutil, time
from tqdm import tqdm


def my_mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)


# ===== settings =====
src_dir = '/home/gpc/disk_1/datasets/fgvc-aircraft-2013b/data'
img_dir = os.path.join(src_dir, 'images')
dst_dir = '/home/gpc/disk_1/datasets'


if __name__ == '__main__':
    begin = time.time()

    for method in ['family', 'manufacturer', 'variant']:
        method_dir = os.path.join(dst_dir, 'fgvc_{}'.format(method))
        my_mkdir(method_dir)

        for dataset in ['train', 'val', 'trainval', 'test']:
            dataset_dir = os.path.join(method_dir, dataset)
            my_mkdir(dataset_dir)
            txt = pd.read_csv(os.path.join(src_dir, 'images_{}_{}.txt'.format(method, dataset)),
                              header=None).to_numpy().flatten()

            for info in tqdm(txt, desc='Copying {} {}'.format(method, dataset)):

                if '/' in info:
                    info = info.replace('/', '_')

                img, cls = info.split(' ', 1)
                cls_dir = os.path.join(dataset_dir, cls)
                my_mkdir(cls_dir)
                shutil.copyfile(os.path.join(img_dir, '{}.jpg'.format(img)),
                                os.path.join(cls_dir, '{}.jpg'.format(img)))

    print('\nAll Done, {} s used.'.format(time.time() - begin))

你可能感兴趣的:(python,dataset)