二元分类 Cat and Dog

二元分类 Cat and Dog

数据来源

Kaggle-Cat and Dog https://www.kaggle.com/tongpython/cat-and-dog

讲解

Coursera-Convolutional Neural Networks in TensorFlow-Exploring a Larger Dataset-Training with the cats vs. dogs dataset

目的

数据集为猫和狗的图片,使用TensorFlow将两者分类。
作者向实践一下讲解中的过程,因此分类的准确率不高。

待测试

  • 对训练和测试集使用数据增强

代码

  • 定义图片目录
import os
# let's define each of these directories
# Directory with our training cat pictures
train_cat_dir = os.path.join('/kaggle/input/training_set/training_set/cats')
# Directory with our training dog pictures
train_dog_dir = os.path.join('/kaggle/input/training_set/training_set/dogs') 
  • 看看图片名称和图片总数
# let's see what the filenames look like in the training directories
train_cat_names = os.listdir(train_cat_dir)
print(train_cat_names[:10])

train_dog_names = os.listdir(train_dog_dir)
print(train_dog_names[:10])

# The total number of images in the directories
print('total training cat images:', len(os.listdir(train_cat_dir)))
print('total training dog images:', len(os.listdir(train_dog_dir)))
  • 看看图片示例
# let's take a look at a few pictures to get a better sense of what they look like
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4

# Index for iterating over images
pic_index = 0

# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)

pic_index += 8
next_cat_pix = [os.path.join(train_cat_dir, fname) 
                for fname in train_cat_names[pic_index-8:pic_index]]
next_dog_pix = [os.path.join(train_dog_dir, fname) 
                for fname in train_dog_names[pic_index-8:pic_index]]

for i, img_path in enumerate(next_cat_pix+next_dog_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()
  • 设计NN
# import tensorflow
import tensorflow as tf
# add the densely connected layers
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 300x300 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fifth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. 
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# prints a summary of the NN
model.summary()
  • 使用二元交叉熵损失函数
# train our model with the binary_crossentropy loss
from tensorflow.keras.optimizers import RMSprop

model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])
  • 准备训练和测试数据
#  set up data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)

# Flow training images in batches of 128 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
        '/kaggle/input/training_set/training_set/',  # This is the source directory for training images
        target_size=(300, 300),  # All images will be resized 
        batch_size=128,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')
# Flow test images in batches of 128 using train_datagen generator
test_generator = test_datagen.flow_from_directory(
        '/kaggle/input/test_set/test_set/',  # This is the source directory for training images
        target_size=(300, 300),  # All images will be resized 
        batch_size=128,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')
  • 训练
#  train for 25 epochs
history = model.fit_generator(
      train_generator,
      steps_per_epoch=8,  
      epochs=25,
      validation_data = test_generator,
      validation_steps = 8,
      verbose=1)
  • 打印一下训练和测试的acc和loss
# plot
import matplotlib.pyplot as plt
acc= history.history['acc']
val_acc= history.history['val_acc']
loss= history.history['loss']
val_loss= history.history['val_loss']
epochs=range(len(acc))
plt.plot(epochs,acc,'bo',label='Training accuracy')
plt.plot(epochs,val_acc,'b',label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs,loss,'bo',label='Training loss')
plt.plot(epochs,val_loss,'b',label='Training loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

你可能感兴趣的:(二元分类 Cat and Dog)