在这里,使用 Mnist 数据集进行演示。
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
因为输入卷积神经网络的数据形状为 (batch_size, height, width, channels),但输入进来的数据集的形状为 (batch_size, height, width),所以在这里要增加一个维度,并把数据类型从整型转换成浮点型。
x_train = tf.expand_dims(x_train, axis=3)
x_test = tf.expand_dims(x_test, axis=3)
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(y_train.shape[0]).batch(64)
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(y_test.shape[0]).batch(y_test.shape[0])
每个 CNN 模块都包含一层卷积层,一层池化层和一层批归一化层。
class CnnSection(tf.keras.Model):
def __init__(self, num_channels):
super().__init__()
self.conv = tf.keras.layers.Conv2D(num_channels,
kernel_size=3,
strides=1,
padding='same',
activation='relu')
self.pool = tf.keras.layers.MaxPool2D(pool_size=2,
strides=2,
padding='same')
self.bn = tf.keras.layers.BatchNormalization()
def call(self, inputs):
x = self.conv(inputs)
x = self.pool(x)
x = self.bn(x)
return x
构建方法与 CNN 模块类似,每个 Dense 模块都包含一层全连接层和一层批归一化层。
class DenseSection(tf.keras.Model):
def __init__(self, units):
super().__init__()
self.dense = tf.keras.layers.Dense(units, activation='relu')
self.bn = tf.keras.layers.BatchNormalization()
def call(self, inputs):
x = self.dense(inputs)
x = self.bn(x)
return x
class Classifier(tf.keras.Model):
def __init__(self,
num_cnn, num_channels,
num_dense, dense_units):
super().__init__()
self.CNN=[]
for i in range(num_cnn):
self.CNN.append(CnnSection(num_channels[i]))
self.flatten = tf.keras.layers.Flatten()
self.DENSE=[]
for i in range(num_dense):
self.DENSE.append(DenseSection(dense_units[i]))
self.DENSE.append(tf.keras.layers.Dense(10, activation='softmax'))
def call(self, inputs):
x = inputs
for layer in self.CNN.layers:
x = layer(x)
x = self.flatten(x)
for layer in self.DENSE.layers:
x = layer(x)
return x
该参数即遗传算法中的染色体。
classifier = Classifier(params[0], params[1], params[2], params[3])
在这里,params 是一个列表,params[0] 是卷积层的层数;params[1] 是一个列表,列表中元素的数量等于 params[0],它表示每层卷积层中的卷积核的个数;params[2] 是全连接层的层数;params[3] 是一个列表,列表中元素的数量等于 params[2],它表示每层全连接层中的神经元的个数。比如,我们设:
params = [3, [256, 256, 512], 4, [256, 256, 128, 32]]
loss_obj_classifier = tf.keras.losses.CategoricalCrossentropy()
def loss_classifier(real, pred):
l = loss_obj_classifier(real, pred)
return l
opt_classifier = tf.keras.optimizers.Adam()
def train_step_classifier(x, y):
with tf.GradientTape() as tape:
pred = classifier(x)
real = tf.one_hot(y, depth=10)
l = loss_classifier(real, pred)
grad = tape.gradient(l, classifier.trainable_variables)
opt_classifier.apply_gradients(zip(grad, classifier.trainable_variables))
return l, tf.cast(tf.argmax(pred, axis=1), dtype=tf.int32), y
epochs_classifier = 1
for epoch in range(epochs_classifier):
for i, (feature, label) in enumerate(dataset_train):
loss, pred_label, real_label = train_step_classifier(feature, label)
if (i+1) % 100 == 0:
print('第{}次训练中第{}批的误差为{}'.format(epoch+1, i+1, loss))
print('第{}次训练后的误差为{}'.format(epoch+1, loss))
total_correct = 0
total_num = 0
for feature, label in dataset_test:
prob = classifier(feature)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, tf.int32)
correct = tf.equal(pred, label)
correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
total_correct += int(correct)
total_num += feature.shape[0]
acc = total_correct / total_num
print('测试集的准确率为{}'.format(acc))
在这里,测试集的准确率就代表了遗传算法中每条染色体的适应度。
至此,模型已经构建完毕,我们将上面的模型写入 project.py 文件,并将数据导入过程以及训练过程分别封装成函数 dataset_train, dataset_test = load() 和 acc = classify(dataset_train, dataset_test, params)。
常规的遗传算法介绍可以参考我的另一篇文章遗传算法求解最大值问题详解(附python代码)。
import numpy as np
import project
import copy
DNA_SIZE = 4
POP_SIZE = 20
CROSS_RATE = 0.2
MUTATION_RATE = 0.1
N_GENERATIONS = 40
dataset_train, dataset_test = project.load()
适应度也就是测试集分类的准确率。
def get_fitness(params):
return project.classify(dataset_train, dataset_test, params)
每遍历完一次种群后,都要根据适应度来从这一代种群中选择染色体构成下一代种群。
def select(pop, fitness):
new_pop = []
idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness / fitness.sum())
for each in idx:
new_pop.append(pop[each])
return new_pop
进化过程中,每条染色体都有机会和其他染色体互换某一部分相同位置上的基因。在这个项目中,由于前两个基因之间和后两个基因之间都存在关联性,即第二(四)个基因列表中的元素个数等于第一(三)个基因所表示的数字,所以在这里,如果要交换第一(二)个基因,那么第二(一)个基因也必须被交换,对于后两个基因也做如此规定。
def crossover(parent, pop):
if np.random.rand() < CROSS_RATE:
chrome_selected = int(np.random.randint(0, POP_SIZE, size=1))
gene_selected = np.random.randint(0, 2, size=int(DNA_SIZE/2)).astype(np.bool)
pop_copy = copy.deepcopy(pop)
parent_cop = copy.deepcopy(parent)
for i, point in enumerate(gene_selected):
if point == True:
parent_cop[2*i: 2*(i+1)] = pop_copy[chrome_selected][2*i: 2*(i+1)]
return parent_cop
else:
return parent
同样地,对于变异函数也需要作出修改:
如果变异的是第二个或第四个基因(和神经元个数有关的基因),那么就对它们随机赋值;如果变异的是第一个或第三个基因(和层数有关的基因),那么就先判断新的随机生成的层数是否和原来的层数相等,如果相等,则不做改变;如果大于原来的层数,那么在神经元基因中要随机添加数字来表示新的层数上的神经元个数;如果小于原来的层数,那么在神经元基因中要去掉最后一个数字来适应现在的层数。
def mutate(child_ori):
child = copy.deepcopy(child_ori)
for point in range(DNA_SIZE):
if np.random.rand() < MUTATION_RATE:
if point == 1 or point == 3:
child[point] = list(np.random.randint(32, 257, size=len(child[point])))
elif point == 0:
new_num = np.random.randint(2, 4)
if new_num < child[point]:
for _ in range(child[point]-new_num):
child[point+1].pop()
elif new_num > child[point]:
for _ in range(new_num-child[point]):
child[point+1].append(np.random.randint(32, 257))
child[point] = new_num
elif point == 2:
new_num = np.random.randint(1, 3)
if new_num < child[point]:
for _ in range(child[point]-new_num):
child[point+1].pop()
elif new_num > child[point]:
for _ in range(new_num-child[point]):
child[point+1].append(np.random.randint(32, 257))
child[point] = new_num
return child
假设卷积层的层数被限制在2到5之间,全连接层的层数被限制在1到5之间,每个层上的神经元个数的范围都是 [32, 257)。
pop = []
for i in range(POP_SIZE):
each_pop = []
number_c = np.random.randint(2, 6)
each_pop.append(number_c)
units_c = np.random.randint(32, 257, size=(number_c, ))
each_pop.append(list(units_c))
number_d = np.random.randint(1, 6)
each_pop.append(number_d)
units_d = np.random.randint(32, 257, size=(number_d, ))
each_pop.append(list(units_d))
pop.append(each_pop)
for each_generation in range(N_GENERATIONS):
fitness = np.zeros([POP_SIZE, ])
for i in range(POP_SIZE):
fitness[i] = get_fitness(pop[i])
print('第%d代第%d个染色体的适应度为%f' % (each_generation+1, i+1, fitness[i]))
print('此染色体为:', pop[i])
print("Generation:", each_generation+1, "Most fitted DNA: ", pop[np.argmax(fitness)], "适应度为:", fitness[np.argmax(fitness)])
pop = select(pop, fitness)
pop_copy = copy.deepcopy(pop)
for i, child in enumerate(pop):
child_cross = crossover(child, pop_copy)
child_mutate = mutate(child)
pop[i] = child_mutate
【注】在以上代码中多处使用了 copy.deepcopy() 函数,其具体作用可以参考:python 复制列表的六种方法。