在这里,使用 Mnist 数据集进行演示。
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
因为输入卷积神经网络的数据形状为 (batch_size, height, width, channels),但输入进来的数据集的形状为 (batch_size, height, width),所以在这里要增加一个维度,并把数据类型从整型转换成浮点型。
x_train = tf.expand_dims(x_train, axis=3)
x_test = tf.expand_dims(x_test, axis=3)
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(y_train.shape[0]).batch(64)
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(y_test.shape[0]).batch(200)
class Conv2D(tf.keras.layers.Layer):
def __init__(self, output_dim, kernel_initial, kernel_size=(3, 3), strides=(1, 1, 1, 1)):
super().__init__()
self.output_dim = output_dim
self.kernel_size = kernel_size
self.strides = strides
self.kernel_initial = kernel_initial
def build(self, input_shape):
bias_shape = tf.TensorShape((self.output_dim, ))
self.kernel = tf.Variable(self.kernel_initial)
self.bias = tf.Variable(tf.zeros(shape=bias_shape))
def call(self, inputs):
output = tf.nn.bias_add(tf.nn.conv2d(inputs, filters=self.kernel, strides=self.strides, padding='SAME'), self.bias)
return tf.nn.relu(output)
每个 CNN 模块都包含一层卷积层,一层池化层和一层批归一化层。
class CnnSection(tf.keras.Model):
def __init__(self, num_channels, kernel_initial):
super().__init__()
self.conv = Conv2D(num_channels, kernel_initial)
self.pool = tf.keras.layers.MaxPool2D(pool_size=2,
strides=2,
padding='same')
self.bn = tf.keras.layers.BatchNormalization()
def call(self, inputs):
x = self.conv(inputs)
x = self.pool(x)
x = self.bn(x)
return x
构建方法与 CNN 模块类似,每个 Dense 模块都包含一层全连接层和一层批归一化层。
class DenseSection(tf.keras.Model):
def __init__(self, units):
super().__init__()
self.dense = tf.keras.layers.Dense(units, activation='relu')
self.bn = tf.keras.layers.BatchNormalization()
def call(self, inputs):
x = self.dense(inputs)
x = self.bn(x)
return x
在这里使用2个卷积层,每个卷积层有32个卷积核;1个全连接层,含32个神经元。
class Classifier(tf.keras.Model):
def __init__(self, kernel_initial):
super().__init__()
self.num_cnn = 1
self.num_channels = [32]
self.num_dense = 1
self.dense_units = [32]
self.CNN=[]
for i in range(self.num_cnn):
self.CNN.append(CnnSection(self.num_channels[i], kernel_initial[i]))
self.flatten = tf.keras.layers.Flatten()
self.DENSE=[]
for i in range(self.num_dense):
self.DENSE.append(DenseSection(self.dense_units[i]))
self.DENSE.append(tf.keras.layers.Dense(10, activation='softmax'))
def call(self, inputs):
x = inputs
for layer in self.CNN.layers:
x = layer(x)
x = self.flatten(x)
for layer in self.DENSE.layers:
x = layer(x)
return x
该参数即粒子群算法中的粒子,
classifier = Classifier(kernel)
在这里,kernel 是一个列表,包含着两个卷积层中每个卷积核的初始权重,分别记为 kernel1 和 kernel2,由于每个卷积层中权重的形状为:
s i z e = ( k e r n e l w i d t h , k e r n e l h e i g h t , c h a n n e l s i n , c h a n n e l s o u t ) size=(kernel_{width}, kernel_{height}, channels_{in}, channels_{out}) size=(kernelwidth,kernelheight,channelsin,channelsout)
所以在这个示例中:
s i z e k e r n e l 1 = ( 3 , 3 , 1 , 32 ) size_{kernel1}=(3, 3, 1, 32) sizekernel1=(3,3,1,32)
s i z e k e r n e l 2 = ( 3 , 3 , 32 , 32 ) size_{kernel2}=(3, 3, 32, 32) sizekernel2=(3,3,32,32)
比如,我们设:
limit1 = np.sqrt(6/(3*3*33))
limit2 = np.sqrt(6/(3*3*64))
kernel = []
kernel1 = np.random.uniform(-limit1, limit1, size=(3, 3, 1, 32))
kernel1 = tf.cast(kernel1, tf.float32)
kernel2 = np.random.uniform(-limit2, limit2, size=(3, 3, 32, 32))
kernel2 = tf.cast(kernel2, tf.float32)
kernel.append(kernel1)
kernel.append(kernel2)
这里使用的初始化方法就是 tf.keras.layers.Conv2D 函数的默认卷积核初始化方法 ‘glorot_uniform’,其基本原理可以参考:Tensorflow2.0 中 tf.keras.layers.Conv2D 里的初始化方法 ‘glorot_uniform’ 到底是个啥?。
loss_obj_classifier = tf.keras.losses.CategoricalCrossentropy()
def loss_classifier(real, pred):
l = loss_obj_classifier(real, pred)
return l
opt_classifier = tf.keras.optimizers.Adam()
def train_step_classifier(x, y):
with tf.GradientTape() as tape:
pred = classifier(x)
real = tf.one_hot(y, depth=10)
l = loss_classifier(real, pred)
grad = tape.gradient(l, classifier.trainable_variables)
opt_classifier.apply_gradients(zip(grad, classifier.trainable_variables))
return l, tf.cast(tf.argmax(pred, axis=1), dtype=tf.int32), y
epochs_classifier = 100
for epoch in range(epochs_classifier):
for i, (feature, label) in enumerate(dataset_train):
loss, pred_label, real_label = train_step_classifier(feature, label)
if (i+1) % 100 == 0:
print('第{}次训练中第{}批的误差为{}'.format(epoch+1, i+1, loss))
print('第{}次训练后的误差为{}'.format(epoch+1, loss))
total_correct = 0
total_num = 0
for feature, label in dataset_test:
prob = classifier(feature)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, tf.int32)
correct = tf.equal(pred, label)
correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
total_correct += int(correct)
total_num += feature.shape[0]
acc = total_correct / total_num
print('测试集的准确率为{}'.format(acc))
在这里,测试集的准确率就代表了粒子群算法中每个粒子的适应度。
至此,模型已经构建完毕,我们将上面的模型写入 project.py 文件,并将数据导入过程以及训练过程分别封装成函数 dataset_train, dataset_test = load() 和 acc = classify(dataset_train, dataset_test, kernel)。
常规的粒子群算法介绍可以参考我的另一篇文章粒子群算法求解最大值问题详解(附python代码)。
import numpy as np
import project
w = 0.8 #惯性因子
c1 = 2 #自身认知因子
c2 = 2 #社会认知因子
r1 = 0.6 #自身认知学习率
r2 = 0.3 #社会认知学习率
pN = 3 #粒子数量
dim = 3 #搜索维度
max_iter = 300 #最大迭代次数
dataset_train, dataset_test = project.load()
适应度也就是测试集分类的准确率。
def get_fitness(params):
return project.classify(dataset_train, dataset_test, kernel)
X = []
V = []
p_best = []
p_bestfit = []
g_bestfit = -1e15
for i in range(pN):
#初始化每一个粒子的位置和速度
kernel = []
kernel1 = np.random.normal(0, 1, size=(3, 3, 1, 32))
kernel1 = tf.cast(kernel1, tf.float32)
kernel2 = np.random.normal(0, 1, size=(3, 3, 32, 32))
kernel2 = tf.cast(kernel2, tf.float32)
kernel.append(kernel1)
kernel.append(kernel2)
X.append(kernel)
velocity = []
velocity1 = np.random.normal(0, 1, size=(3, 3, 1, 32))
velocity1 = tf.cast(velocity1, tf.float32)
velocity2 = np.random.normal(0, 1, size=(3, 3, 32, 32))
velocity2 = tf.cast(velocity2, tf.float32)
velocity.append(velocity1)
velocity.append(velocity2)
V.append(velocity)
p_best.append(kernel)
p_bestfit.append(get_fitness(kernel))
if p_bestfit[i] > g_bestfit:
g_bestfit = p_bestfit[i]
g_best = X[i]
实现两个列表对应元素相加。
def sum_(v1, v2):
return list(map(lambda x: x[0]+x[1], zip(v1, v2)))
实现两个列表对应元素相减。
def subtract(v1, v2):
return list(map(lambda x: x[0]-x[1], zip(v1, v2)))
实现一个列表中的所有元素同乘一个实数。
def multiply(x, w):
res = []
for i in x:
res.append(i*w)
return res
fitness = []
for _ in range(max_iter):
for i in range(pN): #更新g_best\p_best
temp = get_fitness(X[i]) #获得当前位置的适应值
print('第{}个粒子的适应度为{}'.format(i+1, temp))
if temp > p_bestfit[i]: #更新个体最优
p_bestfit[i] = temp
p_best[i] = X[i]
if p_bestfit[i] > g_bestfit: #更新全局最优
g_best = X[i]
g_bestfit = p_bestfit[i]
for i in range(pN): #更新权重
dist1 = subtract(p_best[i], X[i])
dist2 = subtract(g_best, X[i])
dist = sum_(multiply(dist1, c1*r1), multiply(dist2, c2*r2))
V[i] = sum_(multiply(V[i], w), dist)
X[i] = sum_(X[i], V[i])
fitness.append(g_bestfit)
print('fitness: ', fitness)
print(g_best,g_bestfit)