数据集来源:http://download.tensorflow.org/example_images/flower_photos.tgz
Windows中路径是反斜杆,与转义字符混淆。所以书写路径有两种方式防止转移。
import os
ImagePath=r'D:\Projects\DeepLearning\Dataset\flower_photos'
ImagePath='D:\\Projects\\DeepLearning\\Dataset\\flower_photos'
os.listdir(ImagePath)
print(os.listdir(ImagePath))
输出
['daisy', 'dandelion', 'LICENSE.txt', 'roses', 'sunflowers', 'tulips']
def mkdir_p(path):
if not os.path.exists(path):
os.makedirs(path)
#os.mkdir(path)
这里有两个方法os.mkdir(path)和os.makedirs(path)
os.mkdir(path),它的功能是一级一级的创建目录,前提是前面的目录已存在,如果不存在会报异常。
os.makedirs(path),单从写法上就能猜出它的区别,可以一次创建多级目录,哪怕中间目录不存在也能创建。
注意:不要重复使用该脚本,否则训练集和验证集会混在一起。
import os
import random
import shutil
def mkdir_p(path):
if not os.path.exists(path):
os.makedirs(path)
ImagePath='D:\\Projects\\DeepLearning\\Dataset\\flower_photos'
flower_class=[flower for flower in os.listdir(ImagePath) if '.txt' not in flower]
#训练集
mkdir_p(ImagePath+'\\'+'train')
for flower in flower_class:
mkdir_p(ImagePath+'\\train\\'+flower)
#验证集
mkdir_p(ImagePath+'\\'+'val')
for flower in flower_class:
mkdir_p(ImagePath+'\\val\\'+flower)
split_rate=0.2
for flower in flower_class:
flower_path=ImagePath+'\\'+flower+'\\'
images=os.listdir(flower_path)
numbers=len(images)
eval_index=random.sample(images,int(numbers*split_rate))
for index,image in enumerate(images):
if image in eval_index:
images_path=flower_path+image
new_path=ImagePath+'\\val'+'\\'+flower
shutil.copy(images_path,new_path)
else:
images_path=flower_path+image
new_path=ImagePath+'\\train'+'\\'+flower
shutil.copy(images_path,new_path)
print('\r[{}] processing [{}/{}]'.format(flower, index+1, numbers), end='')
print()
print('Split Finish')
输出
[daisy] processing [633/633]
[dandelion] processing [898/898]
[roses] processing [641/641]
[sunflowers] processing [699/699]
[tulips] processing [799/799]
Split Finish