本节介绍如何在tenserflow2.0下自定义模型训练流程。首先我们先实现一个正常的训练流程,然后再改变训练流程,加快训练收敛速度。下面分开两部分讲解。
通过重构模型里train_step(self, data)方法,可自定义训练流程。同理重构test_step(self, data)方法,可自定义测试流程。
正常训练流程:
代码如下:
def train_step(self, data):
'''
正常训练流程
'''
# 解压缩数据。它取决于你的数据集结构。
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # 前向传播
# 计算loss
# (compiled_loss是在调用`compile()`方法时定义)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# 获取变量集,计算梯度
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# 按梯度更新所有权重变量
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# 更新指标(包括指标追踪损失)
self.compiled_metrics.update_state(y, y_pred)
# 返回一个dict指标名称映射到当前值
return {m.name: m.result() for m in self.metrics}
在训练模型时,通常需要较长时间模型才会收敛。要加快收敛速度,可调大学习速率。但速率过大,会导致模型不收敛,太小又会陷入局域极小值。那么能不能我给一个较大的学习速率,更新权重后,先看看loss是否下降,如果没下降再逐步调小学习速率,这样既能加快模型收敛,又可越过局域极小值。(这里要注意,dorpout等随机操作,会影响效果,导致收敛缓慢)
动态学习速率训练流程:
import tensorflow as tf
import numpy as np
import os
import sys
# 根目录
ROOT_DIR = os.path.abspath("./")
np.random.seed(1)
tf.random.set_seed(1)
class MyModel(tf.keras.Model):
def __init__(self, min_lr=1e-6, *args, **kwargs):
super(MyModel, self).__init__(*args, **kwargs)
self.all_layers = [
tf.keras.layers.Conv2D(32, (3, 3), padding='same', use_bias=False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(activation=tf.keras.activations.relu),
tf.keras.layers.AveragePooling2D((2,2)),
tf.keras.layers.Conv2D(128, (3, 3), padding='same', use_bias=False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(activation=tf.keras.activations.relu),
tf.keras.layers.AveragePooling2D((2,2)),
tf.keras.layers.Conv2D(10, (7, 7), padding='valid', use_bias=False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(activation=tf.keras.activations.softmax),
# tf.keras.layers.Dropout(0.3), # dorp会导致loss浮动,每次loss都不同
tf.keras.layers.Flatten(),
]
self.min_lr = min_lr
def build(self, input_shape):
result = super().build(input_shape)
trainable_vars = self.trainable_variables
self.bak_trainable_vars = []
for var in trainable_vars:
self.bak_trainable_vars.append(tf.Variable(var, trainable=False))
self.bak_trainable_last_vars = []
for var in trainable_vars:
self.bak_trainable_last_vars.append(tf.Variable(var, trainable=False))
return result
def call(self, inputs, training):
x = inputs
for layer in self.all_layers:
if isinstance(layer, tf.keras.layers.BatchNormalization):
x = layer(x, training=training)
else:
x = layer(x)
return x
def train_step(self, data):
# 正常训练流程
# return self.train_step_normal(data)
# 动态学习速率训练流程
return self.train_step_fast(data)
def train_step_fast(self, data):
'''
动态学习速率
'''
# 解压缩数据。它取决于你的数据集结构。
x, y = data
loss = 0.0
new_loss = 1.0
# 学习速率从0.1开始,每次乘以0.3,逐渐减少,直到loss小于训练前的loss, 或学习速率小于self.min_lr,停止循环,更新权重
self.optimizer.learning_rate.assign(0.1)
y_pred = self(x, training=True) # 初始化
# 定义循环,找到最合适的学习速率
def loop_fun(loss, new_loss, y_pred, lr):
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # 前向传播
# 计算loss
# (compiled_loss是在调用`compile()`方法时定义)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# 获取变量集,计算梯度
trainable_vars = self.trainable_variables
# 记录训练前的权重
for i in range(len(trainable_vars)):
self.bak_trainable_vars[i].assign(trainable_vars[i])
gradients = tape.gradient(loss, trainable_vars)
# 按梯度更新所有权重变量
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# 计算梯度下降后,loss是否下降
y_pred = self(x, training=True)
new_loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
self.optimizer.learning_rate.assign(self.optimizer.learning_rate * 0.3)
# 记录训练后的权重
for i in range(len(trainable_vars)):
self.bak_trainable_last_vars[i].assign(trainable_vars[i])
# 还原训练前的权重
for i in range(len(trainable_vars)):
trainable_vars[i].assign(self.bak_trainable_vars[i])
y_pred = self(x, training=True) # Forward pass
old_loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
return (loss, new_loss, y_pred, self.optimizer.learning_rate)
loss, new_loss, y_pred, _ = tf.while_loop(lambda loss, new_loss, y_pred, lr: tf.math.logical_and(loss<=new_loss,lr>=self.min_lr), loop_fun, (loss, new_loss, y_pred, self.optimizer.learning_rate))
# 恢复最佳权重
trainable_vars = self.trainable_variables
for i in range(len(trainable_vars)):
trainable_vars[i].assign(self.bak_trainable_last_vars[i])
# 更新指标(包括指标追踪损失)
self.compiled_metrics.update_state(y, y_pred)
# 返回一个dict指标名称映射到当前值
return {m.name: m.result() for m in self.metrics}
def train_step_normal(self, data):
'''
正常训练流程
'''
# 解压缩数据。它取决于你的数据集结构。
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # 前向传播
# 计算loss
# (compiled_loss是在调用`compile()`方法时定义)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# 获取变量集,计算梯度
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# 按梯度更新所有权重变量
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# 更新指标(包括指标追踪损失)
self.compiled_metrics.update_state(y, y_pred)
# 返回一个dict指标名称映射到当前值
return {m.name: m.result() for m in self.metrics}
def main():
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, x_test = tf.expand_dims(x_train, axis=-1), tf.expand_dims(x_test, axis=-1)
y_train, y_test = tf.one_hot(y_train, 10), tf.one_hot(y_test, 10)
print('x_train, y_train', x_train.shape, y_train.shape)
print('x_train, y_train', type(x_train), type(y_train))
print('x_test, y_test', x_test.shape, y_test.shape)
# 卷积实现图片分类
model = MyModel()
model.compile(optimizer=tf.keras.optimizers.SGD(),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'],
)
x = model(x_train[:1], training=False)
print('x:', x)
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False)
x = model(x_train[:1], training=False)
print('x:', x)
trainable_vars = model.trainable_variables
tmp_trainable_vars = []
for i in range(len(trainable_vars)):
tmp_trainable_vars.append(tf.Variable(trainable_vars[i], dtype=trainable_vars[i].dtype))
if __name__ == '__main__':
main()
本节内容到此结束。更多的赞与收藏是我更新的动力,欢迎收藏,当然也欢迎捐献,哈哈哈。