二元分类 Cat and Dog
数据来源
Kaggle-Cat and Dog https://www.kaggle.com/tongpython/cat-and-dog
讲解
Coursera-Convolutional Neural Networks in TensorFlow-Exploring a Larger Dataset-Training with the cats vs. dogs dataset
目的
数据集为猫和狗的图片,使用TensorFlow将两者分类。
作者向实践一下讲解中的过程,因此分类的准确率不高。
待测试
- 对训练和测试集使用数据增强
代码
- 定义图片目录
import os
# let's define each of these directories
# Directory with our training cat pictures
train_cat_dir = os.path.join('/kaggle/input/training_set/training_set/cats')
# Directory with our training dog pictures
train_dog_dir = os.path.join('/kaggle/input/training_set/training_set/dogs')
- 看看图片名称和图片总数
# let's see what the filenames look like in the training directories
train_cat_names = os.listdir(train_cat_dir)
print(train_cat_names[:10])
train_dog_names = os.listdir(train_dog_dir)
print(train_dog_names[:10])
# The total number of images in the directories
print('total training cat images:', len(os.listdir(train_cat_dir)))
print('total training dog images:', len(os.listdir(train_dog_dir)))
- 看看图片示例
# let's take a look at a few pictures to get a better sense of what they look like
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4
# Index for iterating over images
pic_index = 0
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)
pic_index += 8
next_cat_pix = [os.path.join(train_cat_dir, fname)
for fname in train_cat_names[pic_index-8:pic_index]]
next_dog_pix = [os.path.join(train_dog_dir, fname)
for fname in train_dog_names[pic_index-8:pic_index]]
for i, img_path in enumerate(next_cat_pix+next_dog_pix):
# Set up subplot; subplot indices start at 1
sp = plt.subplot(nrows, ncols, i + 1)
sp.axis('Off') # Don't show axes (or gridlines)
img = mpimg.imread(img_path)
plt.imshow(img)
plt.show()
- 设计NN
# import tensorflow
import tensorflow as tf
# add the densely connected layers
model = tf.keras.models.Sequential([
# Note the input shape is the desired size of the image 300x300 with 3 bytes color
# This is the first convolution
tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fifth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
# Only 1 output neuron.
tf.keras.layers.Dense(1, activation='sigmoid')
])
# prints a summary of the NN
model.summary()
- 使用二元交叉熵损失函数
# train our model with the binary_crossentropy loss
from tensorflow.keras.optimizers import RMSprop
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
- 准备训练和测试数据
# set up data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)
# Flow training images in batches of 128 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
'/kaggle/input/training_set/training_set/', # This is the source directory for training images
target_size=(300, 300), # All images will be resized
batch_size=128,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
# Flow test images in batches of 128 using train_datagen generator
test_generator = test_datagen.flow_from_directory(
'/kaggle/input/test_set/test_set/', # This is the source directory for training images
target_size=(300, 300), # All images will be resized
batch_size=128,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
- 训练
# train for 25 epochs
history = model.fit_generator(
train_generator,
steps_per_epoch=8,
epochs=25,
validation_data = test_generator,
validation_steps = 8,
verbose=1)
- 打印一下训练和测试的acc和loss
# plot
import matplotlib.pyplot as plt
acc= history.history['acc']
val_acc= history.history['val_acc']
loss= history.history['loss']
val_loss= history.history['val_loss']
epochs=range(len(acc))
plt.plot(epochs,acc,'bo',label='Training accuracy')
plt.plot(epochs,val_acc,'b',label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,loss,'bo',label='Training loss')
plt.plot(epochs,val_loss,'b',label='Training loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()