由于要将所有需要优化的参数写到一个列表中,所以再此文件中需要定义两个函数,分别是创建卷积层(包括池化层、BN层和dropout层)函数 create_cnn(inputs, filters, is_pool=False) 和创建全连接层(包括BN层和dropout层)函数 create_dense(inputs, units) 。
输入:
输出:
# 定义CNN层函数
def create_cnn(inputs, filters, is_pool=False):
cnn = layers.Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs)
print('Cnn', cnn.shape)
batch_layer = layers.BatchNormalization()(cnn)
cnn_dropout = layers.Dropout(0.2)(batch_layer)
if is_pool:
pool = layers.MaxPool2D(pool_size=(2,2))(cnn_dropout)
print('Pool', pool.shape)
return cnn, batch_layer, cnn_dropout, pool
else:
return cnn, batch_layer, cnn_dropout
输入:
输出:
# 定义Dense层函数
def create_dense(inputs, units):
dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs)
print('Dense', dense.shape)
dense_dropout = layers.Dropout(0.2)(dense)
dense_batch = layers.BatchNormalization()(dense_dropout)
return dense, dense_dropout, dense_batch
设置卷积层参数的时候,由于样本经过池化层之后的shape的高和宽会减少一半,所以这里只要最后两层卷积层有池化层,其他的没有池化层。
# 设置CNN层参数
cnn_num_layers = 2
filters = [32, 64]
if cnn_num_layers == 1:
pool = [1]
else:
pool = list(np.zeros((cnn_num_layers-2,)))
pool.append(1)
pool.append(1)
cnn_name = list(np.zeros((cnn_num_layers,)))
batch_layer_name = list(np.zeros((cnn_num_layers,)))
cnn_dropout_name = list(np.zeros((cnn_num_layers,)))
pool_name = list(np.zeros((cnn_num_layers,)))
# 设置CNN_Dense层参数
cnn_dense_num_layers = 2
cnn_dense_units = [128, 64]
cnn_dense_name = list(np.zeros((cnn_dense_num_layers,)))
cnn_dense_dropout_name = list(np.zeros((cnn_dense_num_layers,)))
cnn_dense_batch_name = list(np.zeros((cnn_dense_num_layers,)))
按照介绍函数时的解释构建网络模型。
inputs_cnn = layers.Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]), name='inputs')
print('Inputs:', inputs_cnn.shape)
for i in range(cnn_num_layers):
if i == 0:
inputs = inputs_cnn
else:
if pool[i-1]:
inputs = pool_name[i-1]
else:
inputs = cnn_dropout_name[i-1]
if pool[i]:
cnn_name[i], batch_layer_name[i], cnn_dropout_name[i], pool_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i])
else:
cnn_name[i], batch_layer_name[i], cnn_dropout_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i])
flatten = layers.Flatten()(pool_name[cnn_num_layers-1])
print('Flatten:', flatten.shape)
for i in range(cnn_dense_num_layers):
if i == 0:
inputs = flatten
else:
inputs = cnn_dense_batch_name[i-1]
cnn_dense_name[i], cnn_dense_dropout_name[i], cnn_dense_batch_name[i] = create_dense(inputs, cnn_dense_units[i])
outputs_cnn = layers.Dense(10, activation='softmax')(cnn_dense_batch_name[cnn_dense_num_layers-1])
print('Outputs:', outputs_cnn.shape)
以上没有用到列表num,而是直接将层数设为2,卷积核数量也直接给出,目的是为了方便讲解,下面给出完整代码:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models, layers, optimizers
import matplotlib.pyplot as plt
# 定义CNN层函数
def create_cnn(inputs, filters, is_pool=False):
cnn = layers.Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs)
print('Cnn', cnn.shape)
batch_layer = layers.BatchNormalization()(cnn)
cnn_dropout = layers.Dropout(0.2)(batch_layer)
if is_pool:
pool = layers.MaxPool2D(pool_size=(2,2))(cnn_dropout)
print('Pool', pool.shape)
return cnn, batch_layer, cnn_dropout, pool
else:
return cnn, batch_layer, cnn_dropout
# 定义Dense层函数
def create_dense(inputs, units):
dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs)
print('Dense', dense.shape)
dense_dropout = layers.Dropout(0.2)(dense)
dense_batch = layers.BatchNormalization()(dense_dropout)
return dense, dense_dropout, dense_batch
def load():
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Mnist数据集简单归一化
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape((-1,28,28,1))
x_test = x_test.reshape((-1,28,28,1))
return x_train, y_train, x_test, y_test
def classify(x_train, y_train, x_test, y_test, num):
# 设置CNN层参数
cnn_num_layers = num[0]
filters = num[2: 2+cnn_num_layers]
if cnn_num_layers == 1:
pool = [1]
else:
pool = list(np.zeros((cnn_num_layers-2,)))
pool.append(1)
pool.append(1)
cnn_name = list(np.zeros((cnn_num_layers,)))
batch_layer_name = list(np.zeros((cnn_num_layers,)))
cnn_dropout_name = list(np.zeros((cnn_num_layers,)))
pool_name = list(np.zeros((cnn_num_layers,)))
# 设置CNN_Dense层参数
cnn_dense_num_layers = num[1]
cnn_dense_units = num[2+cnn_num_layers: 2+cnn_num_layers+cnn_dense_num_layers]
cnn_dense_name = list(np.zeros((cnn_dense_num_layers,)))
cnn_dense_dropout_name = list(np.zeros((cnn_dense_num_layers,)))
cnn_dense_batch_name = list(np.zeros((cnn_dense_num_layers,)))
inputs_cnn = layers.Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]), name='inputs')
for i in range(cnn_num_layers):
if i == 0:
inputs = inputs_cnn
else:
if pool[i-1]:
inputs = pool_name[i-1]
else:
inputs = cnn_dropout_name[i-1]
if pool[i]:
cnn_name[i], batch_layer_name[i], cnn_dropout_name[i], pool_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i])
else:
cnn_name[i], batch_layer_name[i], cnn_dropout_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i])
flatten = layers.Flatten()(pool_name[cnn_num_layers-1])
for i in range(cnn_dense_num_layers):
if i == 0:
inputs = flatten
else:
inputs = cnn_dense_batch_name[i-1]
cnn_dense_name[i], cnn_dense_dropout_name[i], cnn_dense_batch_name[i] = create_dense(inputs, cnn_dense_units[i])
outputs_cnn = layers.Dense(10, activation='softmax')(cnn_dense_batch_name[cnn_dense_num_layers-1])
CNN_model = keras.Model(inputs_cnn, outputs_cnn)
CNN_model.compile(optimizer=keras.optimizers.Adam(),
# loss=keras.losses.CategoricalCrossentropy(), # 需要使用to_categorical
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
history = CNN_model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.1, verbose=0)
print('CNN finished!')
results = CNN_model.evaluate(x_test, y_test, verbose=0)
return results[1]
列表num中的前两个元素分别表示卷积层的层数和全连接层的层数,后面的元素表示每层的卷积核/神经元个数。
返回的值为测试集的准确率。
常规的遗传算法介绍可以参考我的另一篇文章遗传算法求解最大值问题详解(附python代码)。
在优化卷积神经网络这个问题上,用常规的遗传算法不易实现,原因如下:
import numpy as np
import deep_learning as project
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2'
DNA_SIZE = 2
DNA_SIZE_MAX = 9
POP_SIZE = 20
CROSS_RATE = 0.5
MUTATION_RATE = 0.01
N_GENERATIONS = 40
train_x, train_y, test_x, test_y = project.load()
def get_fitness(x):
return project.classify(train_x, train_y, test_x, test_y, num=x)
def select(pop, fitness):
idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness / fitness.sum())
return pop[idx]
def crossover(parent, pop):
if np.random.rand() < CROSS_RATE:
i_ = np.random.randint(0, POP_SIZE, size=1)
cross_points = np.random.randint(0, 2, size=DNA_SIZE_MAX).astype(np.bool)
for i, point in enumerate(cross_points):
if point == True and pop[i_, i]*parent[i] == 0:
cross_points[i] = False
if point == True and i < 2:
cross_points[i] = False
parent[cross_points] = pop[i_, cross_points]
return parent
def mutate(child):
for point in range(DNA_SIZE_MAX):
if np.random.rand() < MUTATION_RATE:
if point >= 3:
if child[point] != 0:
child[point] = np.random.randint(32, 257)
return child
pop_layers = np.zeros((POP_SIZE, DNA_SIZE), np.int32)
pop_layers[:, 0] = np.random.randint(1, 5, size=(POP_SIZE,))
pop_layers[:, 1] = np.random.randint(1, 4, size=(POP_SIZE,))
pop = np.zeros((POP_SIZE, DNA_SIZE_MAX))
for i in range(POP_SIZE):
pop_neurons = np.random.randint(32, 257, size=(pop_layers[i].sum(),))
pop_stack = np.hstack((pop_layers[i], pop_neurons))
for j, gene in enumerate(pop_stack):
pop[i][j] = gene
for each_generation in range(N_GENERATIONS):
fitness = np.zeros([POP_SIZE, ])
for i in range(POP_SIZE):
pop_list = list(pop[i])
for j, each in enumerate(pop_list):
if each == 0.0:
index = j
pop_list = pop_list[:j]
for k, each in enumerate(pop_list):
each_int = int(each)
pop_list[k] = each_int
fitness[i] = get_fitness(pop_list)
print('第%d代第%d个染色体的适应度为%f' % (each_generation+1, i+1, fitness[i]))
print('此染色体为:', pop_list)
print("Generation:", each_generation+1, "Most fitted DNA: ", pop[np.argmax(fitness), :], "适应度为:", fitness[np.argmax(fitness)])
pop = select(pop, fitness)
pop_copy = pop.copy()
for parent in pop:
child = crossover(parent, pop_copy)
child = mutate(child)
parent = child
其中,如下代码的作用是将数组中的0元素删除掉,具体实现过程可以参考我的另一篇文章删掉nd array数组中的所有零元素。
for each_generation in range(N_GENERATIONS):
fitness = np.zeros([POP_SIZE, ])
for i in range(POP_SIZE):
pop_list = list(pop[i])
for j, each in enumerate(pop_list):
if each == 0.0:
index = j
pop_list = pop_list[:j]
for k, each in enumerate(pop_list):
each_int = int(each)
pop_list[k] = each_int