数据增强不可以增强验证集和测试集!!
# !/user/bin/env python
# -*- coding:utf-8 -*-
"""
@author:MXD
@file: 小型数据集训练卷积神经网络.py
@time: 2019/01/20 14:36
@software: PyCharm
"""
import os, shutil
###将数据分别存到各个文件夹
# 原始数据集的解压路径
originial_dataset_dir = 'D:\PythonPractical\KerasPra\download\kaggle_original_data'
base_dir = 'D:\PythonPractical\KerasPra\download\cats_and_dogs_small'
os.mkdir(base_dir)
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)
train_cats_dir = os.path.join(base_dir, 'cats')
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(base_dir, 'dogs')
os.mkdir(train_dogs_dir)
validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)
test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)
fnames = ["cat.{}.jpg".format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copy(src, dst)
fnames = ["cat.{}.jpg".format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copy(src, dst)
fnames = ["cat.{}.jpg".format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copy(src, dst)
fnames = ["dog.{}.jpg".format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(train_dogs_dir, fname)
shutil.copy(src, dst)
fnames = ["dog.{}.jpg".format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(validation_dogs_dir, fname)
shutil.copy(src, dst)
fnames = ["dog.{}.jpg".format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(originial_dataset_dir, fname)
dst = os.path.join(test_dogs_dir, fname)
shutil.copy(src, dst)
### 构建网络
from keras import layers
from keras import models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5)) #drop层:进一步降低过拟合
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
from keras import optimizers
model.compile(loss='binarycrossentrop', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])
# 使用ImageDataGenerator从目标中读取图像
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1. / 255) # 将所有图像乘以1/255缩放
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_dir, target_size=(150, 150), batch_size=20, class_mode='binary'
) # 因为使用了binary_crossentropy损失,所以要使用二进制标签;target_size:指定图片的大小
validation_generator = test_datagen.flow_from_directory(
validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary')
history = model.fit_generator(train_generator, steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50)
model.save('cats_and_dogs_small_1.h5')#训练完成保存模型是一种好习惯88
# 绘图
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
###数据增强
"""
数据增强是从现有的训练样本中生成更多的训练数据,其方法是利用多种能够生成可信图像的随机变换来增加(augment)样本。
其目标是,模型在训练时不会两次查看完全相同的图像。
这可以让模型能够观察导数据的更多内容,从而有更好的泛化能力。
注意:数据增强不能用于验证和测试数据集!!!
"""
from keras.preprocessing import image#图像预处理工具模块
from keras.preprocessing.image import ImageDataGenerator
# ImageDataGenerator:进行数据增强
datagen = ImageDataGenerator(
rotation_range=40,#角度值(0——180范围内),表示图像随机旋转的角度范围
width_shift_range=0.2,#width_shift,height_shift是图像在水平或垂直方向上平移的范围(相对于总宽度和总高度的比例)
height_shift_range=0.2,
shear_range=0.2,#是随机错切变换的角度
zoom_range=0.2,#shear_range是图像缩放的范围
horizontal_flip=True,#是随机将一半图像水平翻转。如果没有水平部队称的假设(比如真实世界的图像),这种做法是有意思的
fill_mode='nearest'#用于填充新创建像素的方法,这些新像素可能来自于旋转或宽度/高度平移
)
fnames = [os.path.join(train_cats_dir,fname) for fname in os.listdir(train_cats_dir)]
img_path =fnames[3] #选择一个图片进行增强
img = image.load_img(img_path,target_size=(150.150))#读取图像并调整大小
x = image.img_to_array(img)#将其转化为形状(150,150,3)的numpy数组
x = x.reshape((1,)+x.shape)#将其形状改变为(1,150,150,3)
# 、生成随机变换后的图像批量。循环是无限的,因此你需要在某个时刻终止循环
i= 0
for batch in datagen.flow(x,batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i+=1
if i %4 ==0:
break
plt.show()