Flowers102花分类的数据下载和处理

本来找到了一个别人准备好的网盘文件,结果一对发现少了图片,张数没对上,最终还是自己处理吧

数据下载

https://www.robots.ox.ac.uk/~vgg/data/flowers/102/

Flowers102花分类的数据下载和处理_第1张图片
下载145,对于1可以使用迅雷下载,快的不是一点!!

下载后解压,解压可以看到是102flowers的文件夹,下面有一个jpg文件,里面全是散的图片

Flowers102花分类的数据下载和处理_第2张图片

数据分类

# encoding:utf-8

import scipy.io

import numpy as np

import os

from PIL import Image

import shutil

labels = scipy.io.loadmat(r'E:\Base_code\test\Flower102\data\imagelabels.mat')#该地址为imagelabels.mat的绝对地址

labels = np.array(labels['labels'][0]) - 1

print("labels:", labels)

setid = scipy.io.loadmat(r'E:\Base_code\test\Flower102\data\setid.mat')#该地址为setid.mat的绝对地址

validation = np.array(setid['valid'][0]) - 1

np.random.shuffle(validation)

train = np.array(setid['trnid'][0]) - 1

np.random.shuffle(train)

test = np.array(setid['tstid'][0]) - 1

np.random.shuffle(test)

flower_dir = list()

for img in os.listdir(r"E:\Base_code\test\Flower102\102flowers\jpg"):#该地址为源数据图片的绝对地址
    flower_dir.append(os.path.join(r"E:\Base_code\test\Flower102\102flowers\jpg", img))

flower_dir.sort()

# print(flower_dir)

des_folder_train = r"E:\Base_code\test\Flower102\prepare_pic\train"#该地址为新建的训练数据集文件夹的绝对地址

for tid in train:

    #打开图片并获取标签

    img = Image.open(flower_dir[tid])

    print(img)

    # print(flower_dir[tid])

    img = img.resize((256, 256), Image.ANTIALIAS)

    lable = labels[tid]

    # print(lable)

    path = flower_dir[tid]

    print("path:", path)

    base_path = os.path.basename(path)

    print("base_path:", base_path)

    classes = "c" + str(lable)

    class_path = os.path.join(des_folder_train, classes)

    # 判断结果

    if not os.path.exists(class_path):

        os.makedirs(class_path)

    print("class_path:", class_path)

    despath = os.path.join(class_path, base_path)

    print("despath:", despath)

    img.save(despath)

des_folder_validation = r"E:\Base_code\test\Flower102\prepare_pic\val"#该地址为新建的验证数据集文件夹的绝对地址

for tid in validation:

    img = Image.open(flower_dir[tid])

    # print(flower_dir[tid])

    img = img.resize((256, 256), Image.ANTIALIAS)

    lable = labels[tid]

    # print(lable)

    path = flower_dir[tid]

    print("path:", path)

    base_path = os.path.basename(path)

    print("base_path:", base_path)

    classes = "c" + str(lable)

    class_path = os.path.join(des_folder_validation, classes)

    # 判断结果

    if not os.path.exists(class_path):

        os.makedirs(class_path)

    print("class_path:", class_path)

    despath = os.path.join(class_path, base_path)

    print("despath:", despath)

    img.save(despath)

des_folder_test = r"E:\Base_code\test\Flower102\prepare_pic\test"#该地址为新建的测试数据集文件夹的绝对地址

for tid in test:

    img = Image.open(flower_dir[tid])

    # print(flower_dir[tid])

    img = img.resize((256, 256), Image.ANTIALIAS)

    lable = labels[tid]

    # print(lable)

    path = flower_dir[tid]

    print("path:", path)

    base_path = os.path.basename(path)

    print("base_path:", base_path)

    classes = "c" + str(lable)

    class_path = os.path.join(des_folder_test, classes)

    # 判断结果

    if not os.path.exists(class_path):

        os.makedirs(class_path)

    print("class_path:", class_path)

    despath = os.path.join(class_path, base_path)

    print("despath:", despath)

    img.save(despath)

按照上述代码替换自己的路径
在这里插入图片描述
ps: 可能下载的标签文件直接点是乱码的,没有关系,直接替换路径就好。

处理好的数据如下:
Flowers102花分类的数据下载和处理_第3张图片

接下来就可以进行自己的下游任务了,因为我打算构图,所以就不在这里写CNN分类的那些训练代码了

别人写的完整代码:
https://github.com/gaoli1537/flower102

有点奇怪的就是这个GitHub的代码确实是按照官方所给的数据划分的,train1020,valid1020,test6149,训练集好少。。很快就过拟合了
但是kaggle的划分是按照8:1:1进行划分的,也就是train 6552,valid 818,test819.

kaggle的是处理了的训练集和验证集,下载链接:https://www.kaggle.com/datasets/nunenuh/pytorch-challange-flower-dataset

然后看paperwithcode的topline都没有数据处理部分,直接加载训练的。。。

你可能感兴趣的:(数据,分类,python,numpy)