Mini-ImageNet处理和制作训练集验证集,测试集

记录深度学习过程中处理数据集的一些方法

文章目录

  • Mini-ImageNet数据集处理
  • 深度学习过程中,制作训练集和验证集、测试集


Mini-ImageNet数据集处理

train.csv,val.csv,test.csv, images train,val,test

import csv
import os
from PIL import Image

train_csv_path = "train.csv"
val_csv_path = "val.csv"
test_csv_path = "test.csv"

train_label = {}
val_label = {}
test_label = {}
with open(train_csv_path) as csvfile:
    csv_reader = csv.reader(csvfile)
    birth_header = next(csv_reader)
    for row in csv_reader:
        train_label[row[0]] = row[1]

with open(val_csv_path) as csvfile:
    csv_reader = csv.reader(csvfile)
    birth_header = next(csv_reader)
    for row in csv_reader:
        val_label[row[0]] = row[1]

with open(test_csv_path) as csvfile:
    csv_reader = csv.reader(csvfile)
    birth_header = next(csv_reader)
    for row in csv_reader:
        test_label[row[0]] = row[1]

img_path = "mini-imagenet/images"
new_img_path = "dataset/mini-imagenet"
for png in os.listdir(img_path):
    path = img_path + '/' + png
    im = Image.open(path)
    if (png in train_label.keys()):
        tmp = train_label[png]
        temp_path = new_img_path + '/train' + '/' + tmp
        if (os.path.exists(temp_path) == False):
            os.makedirs(temp_path)
        t = temp_path + '/' + png
        im.save(t)
        # with open(temp_path, 'wb') as f:
        #     f.write(path)

    elif (png in val_label.keys()):
        tmp = val_label[png]
        temp_path = new_img_path + '/val' + '/' + tmp
        if (os.path.exists(temp_path) == False):
            os.makedirs(temp_path)
        t = temp_path + '/' + png
        im.save(t)

    elif (png in test_label.keys()):
        tmp = test_label[png]
        temp_path = new_img_path + '/test' + '/' + tmp
        if (os.path.exists(temp_path) == False):
            os.makedirs(temp_path)
        t = temp_path + '/' + png
        im.save(t)
print("Finish")

深度学习过程中,制作训练集和验证集、测试集

import os, random, shutil
def moveFile(fileDir):
    pathDir = os.listdir(fileDir)  # 取图片的原始路径
    filenumber = len(pathDir)
    rate = 0.1  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
    picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
    sample = random.sample(pathDir, picknumber)  # 随机选取picknumber数量的样本图片
    print(sample)
    for name in sample:
        shutil.move(fileDir + name, tarDir + name)
    return

def copyFile(fileDir):
    pathDir = os.listdir(fileDir)  # 取图片的原始路径
    filenumber = len(pathDir)
    rate = 0.01  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
    picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
    sample = random.sample(pathDir, picknumber)  # 随机选取picknumber数量的样本图片
    print(sample)
    for name in sample:
        shutil.copy(fileDir + name, tarDir + name)
    return

if __name__ == '__main__':
    img_file_file_path = " "
    new_img_file_path = " " 
    i = 0
    for file in os.listdir(img_file_file_path):
        fileDir = img_file_file_path + '/' + file + '/' # 源图片文件夹路径
        tarDir = new_img_file_path + '/' + file + '/' # 移动到新的文件夹路径
        if(os.path.exists(tarDir) == False):
            os.makedirs(tarDir)
        moveFile(fileDir)
        i += 1
        print("Number of files moved: ", i)
    print("Finish all")

你可能感兴趣的:(Python,python,机器学习,深度学习)