cat_12数据集包含3个部分,训练集cat_12_train,测试集cat_test,以及存储图片名称及标签的train_list.txt
首先,定义一个prepare_image函数,取出文本文件中的图片路径与标签,并且打乱顺序
def prepare_image(file_path):
X_train = []
y_train = []
with open(file_path) as f:
context = f.readlines()
random.shuffle(context)
for str in context:
str = str.strip('\n').split('\t')
X_train.append('./cat_12/' + str[0])
y_train.append(str[1])
return X_train, y_train
再定义一个preprocess_image进行图片归一化操作,将像素值限制在0-1之间。
# 数据归一化
def preprocess_image(image):
image = tf.io.read_file(image)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize_with_pad(image, 227, 227)
image = image / 255.0
return image
导包:tqdm包用于打印进度条
from dataset import prepare_image, preprocess_image
import tensorflow as tf
from tqdm import tqdm
import numpy as np
from matplotlib import pyplot as plt
数据预处理:
X_train, y_train = prepare_image('./cat_12/train_list.txt')
train_images = []
for i in tqdm(X_train):
train_image = preprocess_image(i)
train_images.append(train_image)
from tensorflow import keras
train_images = np.array(train_images)
print(train_images.shape)
y_train = keras.utils.to_categorical(y_train, 12)
定义LRN层:
class LRN(keras.layers.Layer):
def __init__(self, depth_radius=5, bias=1, alpha=1, beta=0.5, **kwargs):
super().__init__(**kwargs)
self.depth_radius = depth_radius
self.bias = bias
self.alpha = alpha
self.beta = beta
def call(self, input):
return tf.nn.lrn(input, self.depth_radius, self.bias, self.alpha, self.beta)
def get_config(self):
base_config = super().get_config()
return {**base_config, 'depth_radius': self.depth_radius,
'bias': self.bias, 'alpha': self.alpha, 'beta': self.beta}
构建模型:
model = keras.Sequential()
# 第一层
model.add(
keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), input_shape=(227, 227, 3), padding="VALID",
activation="relu"))
model.add(LRN())
model.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same"))
# 第二层
model.add(keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), padding="SAME", activation="relu"))
model.add(LRN())
model.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same"))
# 第三层
model.add(keras.layers.Conv2D(filters=384, kernel_size=(5, 5), strides=(1, 1), padding="SAME", activation="relu"))
# 第四层
model.add(keras.layers.Conv2D(filters=384, kernel_size=(5, 5), strides=(1, 1), padding="SAME", activation="relu"))
# 第五层
model.add(keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), padding="SAME", activation="relu"))
model.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same"))
# 第六层
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(4096, activation="relu"))
model.add(keras.layers.Dropout(0.5))
# 第七层
model.add(keras.layers.Dense(4096, activation="relu"))
model.add(keras.layers.Dropout(0.5))
# 第八层
model.add(keras.layers.Dense(12, activation="softmax"))
# keras.utils.plot_model(model=model, to_file='AlexNet.png', show_shapes=True)
model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])
history = model.fit(train_images, y_train, epochs=50, batch_size=16, validation_split=0.2)
model.save('the_AlexNet_model.h5')
打印误差曲线:
def show_training_history(train_history, train, val):
plt.plot(train_history[train], linestyle='-', color='b')
plt.plot(train_history[val], linestyle='--', color='r')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('train', fontsize=12)
plt.legend(['train', 'validation'], loc='lower right')
plt.show()
show_training_history(history.history, 'loss', 'val_loss')
show_training_history(history.history, 'acc', 'val_acc')