认真学习,佛系更博。
本章将介绍sequential的实现,并在结尾尝试搭建一个小型的深度学习模型;
前面已经介绍了数据的载入、全连接层的实现、优化器和激活函数的实现,基本内容已经铺垫好,我们希望实现的sequential如下:
# 两种搭建模型方法
# 1. 列表形式
model1 = Sequential([
Dense(kernel_size=64, activation="sigmoid", input_shape=(784, )),
Dense(kernel_size=10, activation="sigmoid")
])
# 2. add添加网络层
model2 = Sequential()
model2.add(Dense(kernel_size=64, activation="sigmoid", input_shape=(784, )))
model2.add(Dense(kernel_size=10, activation="sigmoid"))
类似于keras,实现方法是在初始化函数中控制一个layer_list变量,并添加一个成员函数add,并且add也可以添加网络到layer_list:
def __init__(self, layer_list=None, name=None):
"""
初始化网络模型
:param layer_list: 网络层列表
:param name: 模型名称
"""
self.layer_list = layer_list if layer_list else []
self.name = name if name else "sequential"
self.loss = None
self.optimizer = None
self.lr = None
def add(self, layer):
"""
添加网络层
:param layer: 神经网络层
:return: 无返回
"""
self.layer_list.append(layer)
下面将实现compile函数,该函数稍微复杂,其中,每层网络的权重shape确定需要前一层模型的单元个数,另外,我们建立了一个新的列表new_layer_list,用于保存更新后的网络层列表,因为我们实现dense时,把激活函数的功能实现交给激活层,因此,检测到activation不为空的情况下,需要额外添加一个新的激活函数层。详细代码如下:
def compile(self, loss="mse", lr=0.01, **k_args):
"""
编译模型
:param loss: 损失函数
:param lr: 学习率
:param k_args: 其他参数, 比如momentum
:return:
"""
# 这里只实现两种损失函数
assert loss in {"mse", "cross_entropy"}
# self.loss赋值为Mse()或CrossEntropy()
# self.optimizer赋值为SGD、Momentum...
self.loss = loss_dict[loss]()
self.lr = lr
input_shape = None
# 开始编译模型
# 建立新的列表,准备插入激活层等;
new_layer_list = []
layer_index = 1
layer_name_set = set()
for index, layer in enumerate(self.layer_list):
if index == 0:
input_shape = layer.get_input_shape()
layer.build(input_shape)
layer.set_name("layer_{}".format(layer_index))
layer_index += 1
if layer.get_name() in layer_name_set:
raise NameError("网络名重复")
layer_name_set.add(layer.get_name())
new_layer_list.append(layer)
# 下一层输入神经但愿个数等于该层个数
input_shape = layer.get_output_shape()
if layer.get_activation_layer():
new_layer = activation_dict[layer.get_activation_layer()]()
new_layer.build(input_shape)
new_layer.set_name("layer_{}".format(layer_index))
layer_index += 1
new_layer_list.append(new_layer)
self.layer_list = new_layer_list
return self
损失函数的实现较简单,直接见代码实现,这里不再赘述;网络模型的前向和反向传播代码:
def forward(self, input_data=None, train=True):
"""
前向运算
:param train: 是否为训练模式
:param input_data: 输入数据
:return: 返回输出
"""
output_signal = input_data
for index, layer in enumerate(self.layer_list):
output_signal = layer.forward(output_signal, train=train)
return output_signal
def backward(self, delta, lr):
"""
反向传播
:param lr: 学习率
:param delta: 梯度
:return:
"""
current_delta = delta
for layer in self.layer_list[::-1]:
current_delta = layer.backward(current_delta)
layer.update(lr)
最后是训练模型代码:
def fit(self, train_data=None, train_label=None, val_ratio=0.2, epoch=10, batch=32, acc=True):
"""
训练模型
:param acc: 是否输出准确率
:param train_data: 训练数据
:param train_label: 训练标签
:param val_ratio: 验证集比例
:param epoch: 迭代代数
:param batch: 批处理大小
:return:
"""
# 划分训练集和验证集
train_data, train_label, val_data, val_label = train_test_split(train_data, train_label, val_ratio)
for i in range(epoch):
# 作梯度更新之前先打乱训练数据的顺序
train_data, train_label = shuffle_data_label(train_data, train_label)
for index in range(0, len(train_data), batch):
batch_data, batch_label = train_data[index: index + batch], train_label[index: index + batch]
y_predict = self.forward(batch_data, train=True)
# 计算当前损失
loss = self.loss.calculate_loss(y_predict, batch_label)
delta = self.loss.derivative()
self.backward(delta, self.lr)
process_percent = index / len(train_data)
if acc:
y_predict_class = np.argmax(y_predict, axis=-1)
y_label_class = np.argmax(batch_label, axis=-1)
accuracy = np.sum(y_predict_class == y_label_class) / batch
print("\repoch_{} {} loss:{}\tacc:{}".format(i + 1,
("-" * int(100 * process_percent)).ljust(100, " "),
loss,
accuracy), end="", flush=True)
else:
print("\repoch_{} {} loss:{}".format(i + 1,
("-" * int(100 * process_percent)).ljust(100, " "),
loss), end="", flush=True)
# 输出换行符
print()
val_loss, val_acc = self.evaluate(val_data, val_label)
if acc:
print(
"validation data size: {}, loss: {}, accuracy: {}".format(len(val_data),
val_loss,
val_acc))
else:
print("validation data size: {}, loss: {}".format(len(val_data), val_loss))
在每一个epoch内,我们使用shuffle打乱训练数据的顺序,其代码详情见github:https://github.com/darkwhale/neural_network ;
至此,我们已经实现了简单的神经网络模型所需要的功能,搭建一个模型尝试一下吧:
from enet.model import Sequential
from enet.data import ImageHandler
from enet.layers import Dense, Sigmoid, Dropout, Softmax, Relu, BatchNormalization, Conv2D, Flatten, MaxPool2D
if __name__ == '__main__':
data_handler = ImageHandler("dataset", gray=True, flatten=True, use_scale=True)
train_data, train_label, test_data, test_label = data_handler.get_data(ratio=0.2, read_cache=False)
model = Sequential()
model.add(Dense(kernel_size=64, activation="sigmoid", input_shape=(784, ), optimizer="adam"))
model.add(Dense(kernel_size=32, optimizer="momentum"))
model.add(Sigmoid())
model.add(Dense(kernel_size=10, activation="sigmoid"))
model.compile(loss="mse", lr=0.01)
model.summary()
model.fit(train_data, train_label, epoch=10)
第一次运行可能需要等待一会,解析图片比较耗时,以后再运行就可以直接从缓存中读取数据;
为了便于显示,我这里只训练了5个epoch,其准确率已经达到了0.82,可见我们实现的模型的准确性。
当然,模型的准确率还可进一步提升,下一章将实现大名鼎鼎的卷积神经网络。