现在有很多的图片,里面分别有猫与狗,识别这些图片,区分猫与狗
设计解决这个问题的思路
1、下载与放置训练图片
2、现在对应的依赖,tensorflow、numpy等等
3、解析文件名,识别dog还是cat
4、建模
5、对模型进行训练
6、用测试模型进行验证
7、输出结果
8、优化模型 to step4
Dogs vs. Cats | Kaggle 现在数据,现在速度比较慢,可以使用网盘。
网盘地址(提取码:lhrr)
import os
filenames = os.listdir('./dogs-vs-cats/train’)
# 动物类型
categories = []
for filename in filenames:
category = filename.split('.')[0]
categories.append(category)
import pandas as pd
# 结构化数据
df = pd.DataFrame({
'filename':filenames,
'category':categories
})
#展示对应的数据
import random
from keras.preprocessing import image
import matplotlib.pyplot as plt
## 看看结构化之后的结果
print(df.head())
print(df.tail())
print(df['category'].value_counts())
df['category'].value_counts().plot(kind = 'bar')
plt.show()
# 展示个图片看看
sample = random.choice(filenames)
image = image.load_img('./dogs-vs-cats/train/' + sample)
plt.imshow(image)
plt.show()
# 切割训练集合
train_df, validate_df = train_test_split(df, test_size = 0.20, random_state = 42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
print(train_df.head())
print(validate_df.head())
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
print("Total number of example in training dataset : {0}".format(total_train))
print("Total number of example in validation dataset : {0}".format(total_validate))
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Flatten,Dropout
from tensorflow.keras import optimizers
## 创建第一个模型
class Model:
def __init__(self, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS):
self.IMG_WIDTH = IMG_WIDTH
self.IMG_HEIGHT = IMG_HEIGHT
self.IMG_CHANNELS = IMG_CHANNELS
def create_model(self):
model = Sequential()
#第一层
#图像空间的2维卷积 32个卷积输出滤波器,卷积窗口的高度和宽度(3,3),输入像素150*150
model.add(Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_uniform',
padding='same',input_shape = (150, 150, 3)))
#卷积窗口的高度和宽度降低为(2,2)
model.add(MaxPooling2D((2,2)))
#第二层
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#第三层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#第四层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#Flatten层用来将输入“压平”,即把多维的输入一维化
model.add(Flatten())
#全链接层,输出空间的维数
model.add(Dense(512, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))
from keras import optimizers
# 设置损失算法与优化
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-4), metrics =['acc'])
return model
# 初始化模型
IMG_WIDTH = 150
IMG_HEIGHT = 150
IMG_CHANNELS = 3
model = Model(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)
model_1 = model.create_model()
model_1.summary()
from keras.preprocessing.image import ImageDataGenerator
#原来是255的像素做 0与1的处理
train_imgdatagen = ImageDataGenerator(rescale = 1./255)
valid_imgdatagen = ImageDataGenerator(rescale = 1./255)
train_generator_m1 = train_imgdatagen.flow_from_dataframe(
train_df,
directory="./dogs-vs-cats/train",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)
validation_generator_m1 = valid_imgdatagen.flow_from_dataframe(
validate_df,
directory="./dogs-vs-cats/train",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)
import numpy as np
# model 1 开始训练
history_1 = model_1.fit(
train_generator_m1,
epochs = 30,
steps_per_epoch = 100,
validation_data = validation_generator_m1,
validation_steps = 50
)
#保存模型
model_1.save('model_1.h5')
print(np.mean(history_1.history['acc']))
print(np.mean(history_1.history['val_acc']))
plt.plot(history_1.history['acc'], color = 'black')
plt.plot(history_1.history['val_acc'], color = 'blue')
plt.title('Training and validation accuracy of model 1')
plt.xlabel('Epochs')
plt.ylabel('Accuracy’)4
plt.show()
plt.plot(history_1.history['loss'], color = 'black')
plt.plot(history_1.history['val_loss'], color = 'blue')
plt.title('Training and validation loss of model 1')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()
【8】写个测试类
from numpy.core.fromnumeric import ptp
import pandas as pd
import os
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
model = load_model('model_1.h5')
test_filenames = os.listdir('./dogs-vs-cats/test3')
test_df = pd.DataFrame(
{
'id' : test_filenames
}
)
print(test_df)
test_gen = ImageDataGenerator(rescale = 1./255)
test_generator = test_gen.flow_from_dataframe(
test_df,
"./dogs-vs-cats/test3",
x_col='id',
y_col=None,
target_size = (128, 128), # resize image to 150x150
batch_size = 20,
class_mode = None,
shuffle=False,
validate_filenames=False
)
predictions = model.predict(test_generator)
print(predictions)
pred = [1 if p > 0.5 else 0 for p in predictions]
test_df['category'] = pred
test_df['category'].value_counts().plot.bar()
plt.show()
得出的结果
模型上还是有所偏差,后续可以优化模型提升正确率