由于真实应用场景下,待优化的参数可能比较多,容易导致模型过拟合,可以通过先对原始图片进行若干层特征提取,卷积计算即是一种有效的特征提取方法。
卷积神经网络动图
红色箭头所指向的两个小正方形的感受野均是5,因为它们均描述了原5*5的正方形的像素情况。
不妨考虑它们的效率谁更好:
主要是弄归一化的
感觉暂时不会深究,等赶完项目回来补吧…
池化用于减少特征数据量,最大值池化可提取图片纹理,均值池化可保留背景特征。
model = tf.keras.models.Sequential([
Conv2D(filters=6, kernel_size=(5, 5), padding='valid'), # 卷积层
BatchNormalization(), # BN层
Activation('relu'), # 激活层
MaxPool2D(pool_size=(2,2),strides=2,padding='same'), # 池化层
Dropout(0.2), #dropout层
])
CIFAR-10 是由 Hinton 的学生 Alex Krizhevsky 和 Ilya Sutskever 整理的一个用于识别普适物体的小型数据集。一共包含 10 个类别的 RGB 彩色图 片:飞机( a叩lane )、汽车( automobile )、鸟类( bird )、猫( cat )、鹿( deer )、狗( dog )、蛙类( frog )、马( horse )、船( ship )和卡车( truck )。图片的尺寸为 32×32 ,数据集中一共有 50000 张训练圄片和 10000 张测试图片。
example1就是对cifar10进行十分类。
下载数据:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
cifar10 = tf.keras.datasets.cifar10
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
plt.imshow(x_train[0])
plt.show()
然后搭建神经网络,因为是10分类问题,所以最后需要一个包含十个神经元的输出层:
实现代码:
import tensorflow as tf
import os
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras import Model
np.set_printoptions(threshold=np.inf)
cifar10 = tf.keras.datasets.cifar10
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
class Baseline(Model):
def __init__(self):
super(Baseline, self).__init__()
self.c1 = Conv2D(filters=6,kernel_size=(5,5),padding='same')
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.p1 = MaxPool2D(pool_size=(2,2),strides=2,padding='same')
self.d1 = Dropout(0.2)
self.flatten = Flatten()
self.f1 = Dense(128,activation='relu')
self.d2 = Dropout(0.2)
self.f2 = Dense(10,activation='softmax')
def call(self,x):
x = self.c1(x)
x = self.b1(x)
x = self.a1(x)
x = self.p1(x)
x = self.d1(x)
x = self.flatten(x)
x = self.f1(x)
x = self.d2(x)
y = self.f2(x)
return y
model = Baseline()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
checkpoint_save_path = "./checkpoint/Baseline.ckpt"
if os.path.exists(checkpoint_save_path + '.index'):
print('-------------load the model-----------------')
model.load_weights(checkpoint_save_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path,
save_weights_only=True,
save_best_only=True)
history = model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), validation_freq=1,
callbacks=[cp_callback])
model.summary()
# print(model.trainable_variables)
file = open('./weights.txt', 'w')
for v in model.trainable_variables:
file.write(str(v.name) + '\n')
file.write(str(v.shape) + '\n')
file.write(str(v.numpy()) + '\n')
file.close()
############################################### show ###############################################
# 显示训练集和验证集的acc和loss曲线
acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
前面使用了一个不知名卷积神经网络进行了10分类,现在进行各类经典网络的复现,观察它们对10分类问题结果的区别。
class LeNet5(Model):
def __init__(self):
super(LeNet5, self).__init__()
self.c1 = Conv2D(filters=6,kernel_size=(5,5))
# self.b1 = BatchNormalization() # Lenet5没有bn层
self.a1 = Activation('sigmoid')
self.p1 = MaxPool2D(pool_size=(2,2),strides=2)
self.c2 = Conv2D(filters=16,kernel_size=(5,5),
activation='sigmoid')
self.p2 = MaxPool2D(pool_size=(2,2),strides=2)
self.flatten = Flatten()
self.f1 = Dense(120,activation='sigmoid')
self.f2 = Dense(84,activation='sigmoid')
self.f3 = Dense(10,activation='softmax')
def call(self,x):
x = self.c1(x)
x = self.a1(x)
x = self.p1(x)
x = self.c2(x)
x = self.p2(x)
x = self.flatten(x)
x = self.f1(x)
x = self.f2(x)
y = self.f3(x)
return y
class AlexNet8(Model):
def __init__(self):
super(AlexNet8, self).__init__()
self.c1 = Conv2D(filters=96, kernel_size=(3, 3))
self.b1 = BatchNormalization()
self.a1 = Activation('relu')
self.p1 = MaxPool2D(pool_size=(3, 3), strides=2)
self.c2 = Conv2D(filters=256, kernel_size=(3, 3))
self.b2 = BatchNormalization()
self.a2 = Activation('relu')
self.p2 = MaxPool2D(pool_size=(3, 3), strides=2)
self.c3 = Conv2D(filters=384, kernel_size=(3, 3), padding='same',
activation='relu')
self.c4 = Conv2D(filters=384, kernel_size=(3, 3), padding='same',
activation='relu')
self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same',
activation='relu')
self.p3 = MaxPool2D(pool_size=(3, 3), strides=2)
self.flatten = Flatten()
self.f1 = Dense(2048, activation='relu')
self.d1 = Dropout(0.5)
self.f2 = Dense(2048, activation='relu')
self.d2 = Dropout(0.5)
self.f3 = Dense(10, activation='softmax')
def call(self, x):
x = self.c1(x)
x = self.b1(x)
x = self.a1(x)
x = self.p1(x)
x = self.c2(x)
x = self.b2(x)
x = self.a2(x)
x = self.p2(x)
x = self.c3(x)
x = self.c4(x)
x = self.c5(x)
x = self.p3(x)
x = self.flatten(x)
x = self.f1(x)
x = self.d1(x)
x = self.f2(x)
x = self.d2(x)
y = self.f3(x)
return y
model = AlexNet8()
注:在改变模型的时候,应先把生成的文件删除后再训练,不然会报错,因为训练完后的tensor维度和最开始需要的tensor维度不同
VGGNet,InceptionNet ,resnet 这三个网络均较为复杂,电脑配置低运行起来花费时间长,等以后有时间再详细研究,目前只是先粗略过一下tensorflow。
感觉神经网络的设计过程和调参过程是玄学过程,也许需要大量学习以后才能学会调参吧。
https://www.bilibili.com/video/BV1B7411L7Qt?p=42&spm_id_from=pageDriver