使用自制数据集训练神经网络模型实现猫狗分类器。使用框架为tensorflow2.网络结构为ResNet,网络结构,4个ResNetBlock,每个结构块4层卷积层,非跳连网络层128个(3×3)卷积核,滑动步长1,特征提取。实线连接网络层(因改变网络结构):128个(1×1)卷积核,滑动步长2特征提取。使用relu激活函数激活,采用分类交叉熵用于loss评判,softmax进行分类回归概率映射。
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense
import pandas as pd
import cv2
# x:图片的路径List, y: 图片种类的数字编码List
def get_tensor(x, y):
'''
训练集、测试集样本生成,训练样本总数为25000个,测试集从总样本中分割,仅对图片数据进行简单处理
'''
ims = []
for i in x:
# 读取路径下的图片
p = tf.io.read_file(i)
# 对图片进行解码,RGB,3通道
p = tf.image.decode_jpeg(p)
# 修改图片大小为64*64
p = tf.image.resize(p, [64, 64])
p = tf.cast(p, dtype=tf.float32) / 255.
# 将图片压入ims列表中
ims.append(p)
x_test = ims[20000:]
# 将List类型转换为tensor类型,并返回
y_test = y[20000:]
y = tf.convert_to_tensor(y[:20000])
ims = tf.convert_to_tensor(ims[:20000])
x_test = tf.convert_to_tensor(x_test)
y_test = tf.convert_to_tensor(y_test)
return ims, y, x_test, y_test
# 训练集图片路径和label保存文件
file = pd.read_csv("./file.csv")
x_train = file["path"]
y_train = file["label"]
x_train, y_train, x_test, y_test = get_tensor(x_train, y_train)
# plt.imshow(x_train[0])
# plt.show()
# ResNet结构块
class ResnetBlock(Model):
def __init__(self, filters, strides=1, residual_path=False):
super(ResnetBlock, self).__init__()
self.filters = filters
self.strides = strides
self.residual_path = residual_path
self.c1 = Conv2D(filters, (3, 3), strides=strides, padding='same', use_bias=False)
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.c2 = Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False)
self.b2 = BatchNormalization()
# residual_path为True时,对输入进行下采样,即用1x1的卷积核做卷积操作,保证x能和F(x)维度相同,顺利相加
if residual_path:
self.down_c1 = Conv2D(filters, (1, 1), strides=strides, padding='same', use_bias=False)
self.down_b1 = BatchNormalization()
self.a2 = Activation('relu')
def call(self, inputs):
residual = inputs # residual等于输入值本身,即residual=x
# 将输入通过卷积、BN层、激活层,计算F(x)
x = self.c1(inputs)
x = self.b1(x)
x = self.a1(x)
x = self.c2(x)
y = self.b2(x)
# residual_path为True时
if self.residual_path:
residual = self.down_c1(inputs)
residual = self.down_b1(residual)
out = self.a2(y + residual)
return out
class ResNet18(Model):
def __init__(self, block_list, initial_filters=128): # block_list表示每个block有几个卷积层
super(ResNet18, self).__init__()
self.num_blocks = len(block_list) # 共有几个block
self.block_list = block_list
self.out_filters = initial_filters
self.c1 = Conv2D(self.out_filters, (3, 3), strides=1, padding='same', use_bias=False)
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.blocks = tf.keras.models.Sequential()
# 构建ResNet网络结构0
for block_id in range(len(block_list)): # 第几个resnet block
for layer_id in range(block_list[block_id]): # 第几个卷积层
if block_id != 0 and layer_id == 0: # 对除第一个block以外的每个block的输入进行下采样
block = ResnetBlock(self.out_filters, strides=2, residual_path=True)
else:
block = ResnetBlock(self.out_filters, residual_path=False)
self.blocks.add(block) # 将构建好的block加入resnet
self.out_filters *= 2 # 下一个block的卷积核数是上一个block的2倍
self.p1 = tf.keras.layers.GlobalAveragePooling2D()
self.d1 = Dropout(0.2)
self.f1 = tf.keras.layers.Dense(2, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2())
def call(self, inputs):
x = self.c1(inputs)
x = self.b1(x)
x = self.a1(x)
x = self.blocks(x)
x = self.p1(x)
x = self.d1(x)
y = self.f1(x)
return y
model = ResNet18([4, 4, 4, 4])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
checkpoint_save_path = "./save/CatDog.ckpt"
if os.path.exists(checkpoint_save_path + '.index'):
print('-------------加载模型-----------------')
model.load_weights(checkpoint_save_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True,
save_best_only=True)
#
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), validation_freq=1, batch_size=32, epochs=10,
callbacks=[cp_callback])
model.summary()
file = open('weights.txt', 'w')
for v in model.trainable_variables:
file.write(str(v.name) + '\n')
file.write(str(v.shape) + '\n')
file.write(str(v.numpy()) + '\n')
file.close()
# 显示训练集和验证集的acc和loss曲线
acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
使用该模型进行预测,100张本地图片最高识别准确率为97%。训练集测试样本准确率97%,测试集识别准确度为92%。
import os
import cv2
import tensorflow as tf
import numpy as np
import sys
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.models import Model
'''
该模型拥有4个ResNetBlock 每个block块3层卷积层,每两个block块第一层实现层差跳连,减少由于多次卷积操作造成的模型退化,提升了模型感知力,提升了模型的识别准确率。使用relu激活函数,使用分类交叉熵损失函数,L2正则化处理。使用自制数据集进行训练,训练集测试样本准确率97%,测试集识别准确度为92%,用非训练图片进行识别,准确率为91%。
'''
path = sys.path[0] + "\\save\\CatDog_1.ckpt"
class ResnetBlock(Model):
def __init__(self, filters, strides=1, residual_path=False):
super(ResnetBlock, self).__init__()
self.filters = filters
self.strides = strides
self.residual_path = residual_path
self.c1 = Conv2D(filters, (3, 3), strides=strides, padding='same', use_bias=False)
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.c2 = Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False)
self.b2 = BatchNormalization()
# residual_path为True时,对输入进行下采样,即用1x1的卷积核做卷积操作,保证x能和F(x)维度相同,顺利相加
if residual_path:
self.down_c1 = Conv2D(filters, (1, 1), strides=strides, padding='same', use_bias=False)
self.down_b1 = BatchNormalization()
self.a2 = Activation('relu')
def call(self, inputs):
residual = inputs # residual等于输入值本身,即residual=x
# 将输入通过卷积、BN层、激活层,计算F(x)
x = self.c1(inputs)
x = self.b1(x)
x = self.a1(x)
x = self.c2(x)
y = self.b2(x)
if self.residual_path:
residual = self.down_c1(inputs)
residual = self.down_b1(residual)
out = self.a2(y + residual)
return out
class ResNet18(Model):
def __init__(self, block_list, initial_filters=128): # block_list表示每个block有几个卷积层
super(ResNet18, self).__init__()
self.num_blocks = len(block_list) # 共有几个block
self.block_list = block_list
self.out_filters = initial_filters
self.c1 = Conv2D(self.out_filters, (3, 3), strides=1, padding='same', use_bias=False)
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.blocks = tf.keras.models.Sequential()
# 构建ResNet网络结构0
for block_id in range(len(block_list)): # 第几个resnet block
for layer_id in range(block_list[block_id]): # 第几个卷积层
if block_id != 0 and layer_id == 0: # 对除第一个block以外的每个block的输入进行下采样
block = ResnetBlock(self.out_filters, strides=2, residual_path=True)
else:
block = ResnetBlock(self.out_filters, residual_path=False)
self.blocks.add(block) # 将构建好的block加入resnet
self.out_filters *= 2 # 下一个block的卷积核数是上一个block的2倍
self.p1 = tf.keras.layers.GlobalAveragePooling2D()
self.d1 = Dropout(0.2)
self.f1 = tf.keras.layers.Dense(2, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2())
def call(self, inputs):
x = self.c1(inputs)
x = self.b1(x)
x = self.a1(x)
x = self.blocks(x)
x = self.p1(x)
x = self.d1(x)
y = self.f1(x)
return y
model = ResNet18([4, 4, 4, 4])
# 加载模型进行预测
model.load_weights(filepath=path)
# 预测
def predict():
path = sys.path[0] + "\\test"
images = []
for image in [i for i in os.listdir(path) if "jpg" in i]:
image = cv2.imread(path + "\\" + image, )
image = cv2.resize(image, (64, 64))
image = image / 255.
image = np.array(image)
images.append(image)
images = np.array(images)
return images
images = predict()
for i in range(len(images)):
res = np.argmax(model.predict(images)[i])
if int(res) == 0:
tf.print(f"第{i+1}图识别结果为:猫")
else:
tf.print(f"第{i+1}图识别结果为:狗")
# plt.imshow(images[i])
# plt.show()
image =images[i]
cv2.imshow(f"{i}",image)
cv2.waitKey(0)
cv2.destroyAllWindows()