搭建CNN识别你的猫猫狗狗

本文为Udacity优达学城深度学习课程笔记第三篇,使用图像增强对猫狗图像进行分类。
课程地址:https://classroom.udacity.com/courses/ud187

搭建CNN识别你的猫猫狗狗_第1张图片
最终目的是训练CNN模型,使其能识别上面的小狗狗是小狗狗。可能遇到的问题有过拟合、输入图像的尺寸不一,不过不用担心,本文会一一解决。为了保持笔记与课程内容一致,代码没有进行修改,如需提升识别准确率可自行对模型进行修改。

import os
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
tf.logging.set_verbosity(tf.logging.ERROR)
F:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

data loading

在线下载速度较慢且容易掉线,您可以下载到本地后解压到C:\Users\用户名\.keras\datasets下。
数据地址:https://download.csdn.net/download/dr_guo/11211405

# url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
# zip_dir = tf.keras.utils.get_file('cats_and_dogs_filterted.zip', origin=url, extract=True)
# zip_dir_base = os.path.dirname(zip_dir)
# !find $zip_dir_base -type d -print
# !for /r c:\ %i in (zip_dir_base) do echo %i
# import zipfile
# local_zip = 'C:\\Users\\guoco\\.keras\\datasets\\cats_and_dogs_filtered.zip'
# zip_ref = zipfile.ZipFile(local_zip, 'r')
# zip_ref.extractall('C:\\Users\\guoco\\.keras\\datasets')
# zip_ref.close()
base_dir = 'C:\\Users\\guoco\\.keras\\datasets\\cats_and_dogs_filtered'
# base_dir = os.path.join(os.path.dirname(zip_dir), 'cats_and_dogs_filterted')

train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')

train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
val_cats_dir = os.path.join(val_dir, 'cats')
val_dogs_dir = os.path.join(val_dir, 'dogs')

understand data

num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))

num_cats_val = len(os.listdir(val_cats_dir))
num_dogs_val = len(os.listdir(val_cats_dir))

total_tr = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val
num_cats_tr, num_dogs_tr, num_cats_val, num_dogs_val, total_tr, total_val
(1000, 1000, 500, 500, 2000, 1000)

set model params

batch_size = 100
img_shape = 150

data preparation

train_img_generator = ImageDataGenerator(rescale=1./255)
val_img_generator = ImageDataGenerator(rescale=1./255)
train_data_gen = train_img_generator.flow_from_directory(batch_size=batch_size,
                                                        directory=train_dir,
                                                        shuffle=True,
                                                        target_size=(img_shape, img_shape),
                                                        class_mode='binary')
Found 2000 images belonging to 2 classes.
val_data_gen = val_img_generator.flow_from_directory(batch_size=batch_size,
                                                        directory=val_dir,
                                                        shuffle=False,
                                                        target_size=(img_shape, img_shape),
                                                        class_mode='binary')
Found 1000 images belonging to 2 classes.

visual train image

sample_training_images, _ = next(train_data_gen)
def plot_imgs(imgs_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20, 20))
    axes = axes.flatten()
    for img, ax in zip(imgs_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()
plot_imgs(sample_training_images[:5])

搭建CNN识别你的猫猫狗狗_第2张图片

model

# define model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])
# compile model
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['acc'])
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 6272)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               3211776   
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1026      
=================================================================
Total params: 3,453,634
Trainable params: 3,453,634
Non-trainable params: 0
_________________________________________________________________
# train model
epochs = 10
history = model.fit_generator(
    train_data_gen,
    steps_per_epoch = int(np.ceil(total_tr / float(epochs))),
    epochs=epochs,
    validation_data = val_data_gen,
    validation_steps = int(np.ceil(total_val / float(epochs)))
)
Epoch 1/10
200/200 [==============================] - 53s 265ms/step - loss: 0.5873 - acc: 0.6619 - val_loss: 0.5766 - val_acc: 0.7160
Epoch 2/10
200/200 [==============================] - 42s 208ms/step - loss: 0.1329 - acc: 0.9461 - val_loss: 1.3645 - val_acc: 0.7180
Epoch 3/10
200/200 [==============================] - 41s 206ms/step - loss: 0.0261 - acc: 0.9916 - val_loss: 1.6375 - val_acc: 0.7450
Epoch 4/10
200/200 [==============================] - 42s 208ms/step - loss: 2.5784e-04 - acc: 1.0000 - val_loss: 1.8278 - val_acc: 0.7440
Epoch 5/10
200/200 [==============================] - 41s 205ms/step - loss: 5.6702e-05 - acc: 1.0000 - val_loss: 1.9028 - val_acc: 0.7450
Epoch 6/10
200/200 [==============================] - 42s 209ms/step - loss: 3.1396e-05 - acc: 1.0000 - val_loss: 1.9526 - val_acc: 0.7500
Epoch 7/10
200/200 [==============================] - 41s 207ms/step - loss: 1.9819e-05 - acc: 1.0000 - val_loss: 1.9947 - val_acc: 0.7530
Epoch 8/10
200/200 [==============================] - 41s 206ms/step - loss: 1.2867e-05 - acc: 1.0000 - val_loss: 2.0304 - val_acc: 0.7470
Epoch 9/10
200/200 [==============================] - 41s 204ms/step - loss: 8.8153e-06 - acc: 1.0000 - val_loss: 2.0579 - val_acc: 0.7490
Epoch 10/10
200/200 [==============================] - 41s 205ms/step - loss: 6.4347e-06 - acc: 1.0000 - val_loss: 2.0853 - val_acc: 0.7470
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Acc')
plt.plot(epochs_range, val_acc, label='Val Acc')
plt.legend(loc='lower right')
plt.title('Train and Val Acc')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Val Loss')
plt.legend(loc='upper right')
plt.title('Train and Val Loss')
plt.savefig('foo.png')
plt.show()

搭建CNN识别你的猫猫狗狗_第3张图片
可以明显看出过拟合了,下面将解决这个问题。

Softmax 与 S 型函数

上面我们使用了以下 CNN 结构:

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])  

注意,最后一个层级(分类器)由一个 Dense 层(具有 2 个输出单元)和一个 softmax 激活函数组成,如下所示:

 tf.keras.layers.Dense(2, activation='softmax')

在处理二元分类问题时,另一个常见方法是:分类器由一个 Dense 层(具有 1 个输出单元)和一个 sigmoid 激活函数组成,如下所示:

 tf.keras.layers.Dense(1, activation='sigmoid')

这两种方法都适合二元分类问题,但是请注意,如果决定在分类器中使用 sigmoid 激活函数,需要将 model.compile() 方法中的 loss 参数从 ‘sparse_categorical_crossentropy’ 更改为’binary_crossentropy’,如下所示:

model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['accuracy'])

改善过拟合

data augmentation 数据增强

# 随机水平翻转
img_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
train_data_gen = img_gen.flow_from_directory(batch_size=batch_size,
                                            directory=train_dir,
                                            shuffle=True,
                                            target_size=(img_shape, img_shape))
Found 2000 images belonging to 2 classes.
augmented_imgs = [train_data_gen[0][0][0] for i in range(5)]
plot_imgs(augmented_imgs)

搭建CNN识别你的猫猫狗狗_第4张图片

# 45度旋转
img_gen = ImageDataGenerator(rescale=1./255, rotation_range=45)
train_data_gen = img_gen.flow_from_directory(batch_size=batch_size,
                                            directory=train_dir,
                                            shuffle=True,
                                            target_size=(img_shape, img_shape))
augmented_imgs = [train_data_gen[0][0][0] for i in range(5)]
plot_imgs(augmented_imgs)
Found 2000 images belonging to 2 classes.

搭建CNN识别你的猫猫狗狗_第5张图片

# 缩放
img_gen = ImageDataGenerator(rescale=1./255, zoom_range=0.5)
train_data_gen = img_gen.flow_from_directory(batch_size=batch_size,
                                            directory=train_dir,
                                            shuffle=True,
                                            target_size=(img_shape, img_shape))
augmented_imgs = [train_data_gen[0][0][0] for i in range(5)]
plot_imgs(augmented_imgs)
Found 2000 images belonging to 2 classes.

搭建CNN识别你的猫猫狗狗_第6张图片

# 整合在一起
img_gen_train = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')
train_data_gen = img_gen_train.flow_from_directory(batch_size=batch_size,
                                                   directory=train_dir,
                                                   shuffle=True,
                                                   target_size=(img_shape, img_shape),
                                                   class_mode='binary')
Found 2000 images belonging to 2 classes.
augmented_imgs = [train_data_gen[0][0][0] for i in range(5)]
plot_imgs(augmented_imgs)

搭建CNN识别你的猫猫狗狗_第7张图片

img_gen_val = ImageDataGenerator(rescale=1./255)
val_data_gen = img_gen_val.flow_from_directory(batch_size=batch_size,
                                               directory=val_dir,
                                               shuffle=False,
                                               target_size=(img_shape, img_shape),
                                               class_mode='binary')
Found 1000 images belonging to 2 classes.

model

# define model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])
# compile model
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['acc'])
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
dropout (Dropout)            (None, 7, 7, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 6272)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               3211776   
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1026      
=================================================================
Total params: 3,453,634
Trainable params: 3,453,634
Non-trainable params: 0
_________________________________________________________________
# train model
epochs = 10
history = model.fit_generator(
    train_data_gen,
    steps_per_epoch = int(np.ceil(total_tr / float(epochs))),
    epochs=epochs,
    validation_data = val_data_gen,
    validation_steps = int(np.ceil(total_val / float(epochs)))
)
Epoch 1/10
200/200 [==============================] - 109s 547ms/step - loss: 0.6411 - acc: 0.6165 - val_loss: 0.5381 - val_acc: 0.7310
Epoch 2/10
200/200 [==============================] - 104s 519ms/step - loss: 0.5442 - acc: 0.7225 - val_loss: 0.5124 - val_acc: 0.7480
Epoch 3/10
200/200 [==============================] - 104s 518ms/step - loss: 0.4881 - acc: 0.7616 - val_loss: 0.4386 - val_acc: 0.7870
Epoch 4/10
200/200 [==============================] - 104s 521ms/step - loss: 0.4263 - acc: 0.8022 - val_loss: 0.3878 - val_acc: 0.8250
Epoch 5/10
200/200 [==============================] - 105s 523ms/step - loss: 0.3634 - acc: 0.8399 - val_loss: 0.3982 - val_acc: 0.8250
Epoch 6/10
200/200 [==============================] - 106s 532ms/step - loss: 0.3212 - acc: 0.8620 - val_loss: 0.3918 - val_acc: 0.8300
Epoch 7/10
200/200 [==============================] - 104s 521ms/step - loss: 0.2794 - acc: 0.8848 - val_loss: 0.3945 - val_acc: 0.8310
Epoch 8/10
200/200 [==============================] - 106s 528ms/step - loss: 0.2520 - acc: 0.8957 - val_loss: 0.3684 - val_acc: 0.8520
Epoch 9/10
200/200 [==============================] - 105s 523ms/step - loss: 0.2209 - acc: 0.9090 - val_loss: 0.3908 - val_acc: 0.8530
Epoch 10/10
200/200 [==============================] - 104s 521ms/step - loss: 0.1968 - acc: 0.9203 - val_loss: 0.3924 - val_acc: 0.8540
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Acc')
plt.plot(epochs_range, val_acc, label='Val Acc')
plt.legend(loc='lower right')
plt.title('Train and Val Acc')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Val Loss')
plt.legend(loc='upper right')
plt.title('Train and Val Loss')
plt.savefig('foo.png')
plt.show()

搭建CNN识别你的猫猫狗狗_第8张图片

防止过拟合

上面我们使用了防止过拟合的三种不同方法:

  • 早停法:对于此方法,我们会在训练过程中跟踪验证集的损失,并根据该损失判断何时停止训练,使模型很准确,但是不会过拟合。
  • 图像增强:通过向训练集中的现有图像应用随机图像转换,人为地增加训练集中的图像数量。
  • 丢弃:在训练过程中,从神经网络中随机选择固定数量的神经元并关闭这些神经元。

其他方法请百度

# 保存模型
tf.keras.models.save_model(
    model,
    "model/cat_dog.h5",
    overwrite=True,
    include_optimizer=True
)
# 加载模型
load_model = tf.keras.models.load_model("model/cat_dog.h5")
val_images, _ = next(val_data_gen)
plot_imgs(val_images[5:10])

搭建CNN识别你的猫猫狗狗_第9张图片

load_model.predict_classes(val_images[5:10])

全部识别正确,0为猫,1为狗

array([0, 0, 0, 0, 0], dtype=int64)
dog = plt.imread('dog.jpg')
plt.imshow(dog)
plt.axis('off')
plt.show()

搭建CNN识别你的猫猫狗狗_第10张图片

dog.shape
(1080, 1440, 3)
val_images[:1].shape
(1, 150, 150, 3)
from keras.preprocessing.image import load_img, img_to_array

img = load_img('dog.jpg',target_size=(150, 150))
img = img_to_array(img) /255.
Using TensorFlow backend.
load_model.predict_classes(img.reshape(1, 150, 150, 3))

大功告成!终于识别出我的小狗狗是狗啦!

array([1], dtype=int64)

如果换成一张人脸进行识别呢?大家有没有想起来前段时间很火的你像哪个动物的应用~

你可能感兴趣的:(cnn,图像识别,深度学习与神经网络)