import os, shutil
# 下载数据集,解压
original_dataset_dir = 'train/'
# 设置数据集各级目录名称
base_dir = 'data'
data_dirs = ['train', 'validation', 'test']
class_dirs = ['cat', 'dog']
# 分别设置训练。验证和测试数据集的图像文件区间
images = {
'train': [0, 1000], # 1000张图片做训练
'validation': [1000, 1500], # 500张图片做验证
'test': [1500, 2000], # 500张图片做测试
}
for d in data_dirs:
for c in class_dirs:
# 构造数据集目录, 格式实例:’data/train/dog'
dataset_dir = f"{base_dir}/{d}/{c}/"
os.makedirs(dataset_dir, exist_ok=True)
# 构造图像文件名称,格式示例:‘cat.1.jpg'
fnames = [f'{c}.{i}.jpg' for i in range(images[d][0], images[d][1])]
# 将挑选的图像文件拷贝到对应的数据集目录下
for fname in fnames:
shutil.copyfile(original_dataset_dir+fname, dataset_dir+fname)
from keras import models, layers, optimizers
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu')) # 加入两层全连接层用于分类
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
# 模型编译
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e4-4),
metrics=['acc'])
from keras.preprocessing.image import ImageDataGenerator
# 使用 ImageDataGenerator 从目录中读取图像
train_datagen = ImageDataGenerator(rescale=1./255) # 值标准化,解码为RGB
test_datagen = ImageDataGenerator(rescale=1./255)
# 创建训练集数据生成器, 猫和狗各1000张
train_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
# 创建测试集数据生成器, 猫和狗各500张
test_generator = test_datagen.flow_from_directory(
'data/test',
target_size=(150, 150),
batch_size=20,
class_mode='binary'
)
for data_batch, labels_batch in train_generator:
print(f'data_batch.shape:{data_batch.shape}')
print(f'label_batch.shape:{labels_batch.shape}')
break
# 模型训练
history = model.fit_generator(train_generator,
steps_per_epoch=50,
epochs=30, # 原本是30轮的,电脑承受不住,改成10轮了
validation_data=test_generator,
validation_steps=50)
model.save('cats_and_dogs_small_1.h5')
# 可视化对比
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validataion acc')
plt.title('Training and validataion accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label= 'Training loss')
plt.plot(epochs, val_loss, 'b', label= 'Validation loss')
plt.title("Training and validataion loss")
plt.legend()
plt.show()