我们在前面的模型的训练过程中model.compile函数中指定的optimizer就是求导的算法类型,一般我们直接指定为sgd、adam算法求导,这里我们可以不依赖于tensorflow中相关算法求导,转而选择使用自定义的算法求导,这样就可以满足一些特定的需求。
import matplotlib as mpl #画图用的库
import matplotlib.pyplot as plt
#下面这一句是为了可以在notebook中画图
%matplotlib inline
import numpy as np
import sklearn #机器学习算法库
import pandas as pd #处理数据的库
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras #使用tensorflow中的keras
#import keras #单纯的使用keras
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, sklearn, pd, tf, keras:
print(module.__name__, module.__version__)
############对一元函数进行求导############
def f(x):
return 3. * x ** 2 + 2. * x - 1
#近似求导函数
def approximate_derivative(f, x, eps = 1e-3):#传入函数f,x,eps用来求导数的
return (f(x+eps) - f(x-eps)) / (2. * eps)
print(approximate_derivative(f, 1.))
7.999999999999119
############对二元函数进行求导############
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)#lambda表达式生成的函数(x2是固定值)
dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)#lambda表达式生成的函数(x1是固定值)
return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
(8.999999999993236, 41.999999999994486)
##### 变量求导1 #####
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
print(dz_x1)
#没有被保存的tape只能被使用一次,所以这里会报异常
try :
dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
print(ex)
with tf.GradientTape() as tape:
z = g(x1, x2)
dz_x2 = tape.gradient(z, x2)
print(dz_x2)
tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.
tf.Tensor(42.0, shape=(), dtype=float32)
##### 变量求导2 #####
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape(persistent=True) as tape:
z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
del tape#手动删除tape
tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)
##### 变量求导3 #####
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape() as tape:
z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对x1的偏导
print(dz_x1x2)
[, ]
#常量求导
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape() as tape:
tape.watch(x1)
tape.watch(x2)
z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对 x1、x2的偏导,所以这里有两个输出结果
print(dz_x1x2)
[, ]
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
z1 = 3 * x
z2 = x ** 2
tape.gradient([z1, z2], x)# 求z1、z2 对 x 的导数,然后求和
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:#定义外层tape
with tf.GradientTape(persistent=True) as inter_tape:
z=g(x1, x2)
inter_grads = inter_tape.gradient(z, [x1, x2]) #求对 x1、x2 的偏导,所以这里有两个输出结果
outer_grads = [outer_tape.gradient(inter_grad, [x1, x2]) for inter_grad in inter_grads] #对inter_grads每个结果求偏导
print(outer_grads)
del inter_grads
del outer_grads
[[None, ], [, ]]
#模拟梯度下降
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
with tf.GradientTape() as tape:
z=f(x)
dz_dx = tape.gradient(z, x)
x.assign_sub(learning_rate * dz_dx) #对x进行更新 x = x - learning_rate * dz_dx
print(x)
#模拟梯度下降并结合optimizer使用
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr=learning_rate)
for _ in range(100):
with tf.GradientTape() as tape:
z=f(x)
dz_dx = tape.gradient(z, x)
optimizer.apply_gradients([(dz_dx, x)])
print(x)
依据前面的房价回归问题上,我们使用自定义求导来实现模型的训练,这里不贴出所有代码,仅模型相关代码如下:
#metric使用
#直接调用均方差函数 MeanSquaredError()
metric=keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))#这里单独输出为 9
print(metric([0.], [1.]))#这里单独输出为 1
print(metric.result())#累加总的结果输出为 1/2 * (9+1) = 5
#不想累加的话调用reset_states
metric.reset_states()
metric([1.],[3.])
print(metric.result())
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
#1.batch 遍历训练集 metric
# 自动求导
#2. epoch结束 验证集 metric
epochs=100
batch_size=32#batch_size表示一次训练的样本数
steps_per_epoch=len(x_train_scaler) // batch_size # 除以batch_size结果取整,表示每个epoch训练样本的次数
optimizer=keras.optimizers.SGD()# optimizer选择 sgd
metric=keras.metrics.MeanSquaredError()#损失函数 mse均方差
#自定义一次随机训练取出的32个样本数
def random_batch(x, y, batch_size=32):
idx=np.random.randint(0,len(x),size=batch_size)#从 0 到 len(x)总的数量中 随机取出32个索引
return x[idx],y[idx]
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu",input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
for epoch in range(epochs):
metric.reset_states()#防止平方差值累加
for step in range(steps_per_epoch):
x_batch, y_batch = random_batch(x_train_scaler, y_train, batch_size)
with tf.GradientTape() as tape:
y_pred = model(x_batch)
loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
metric(y_batch,y_pred)
grads = tape.gradient(loss, model.variables)
grads_and_vars = zip(grads, model.variables)
optimizer.apply_gradients(grads_and_vars)
print("\rEpoch", epoch, " train mse:", metric.result().numpy(), end="")
y_valid_pred = model(x_valid_scaler)
valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid_pred, y_valid))
print("\t", "valid mse: ", valid_loss.numpy())
'''
#tf.keras.models.Sequential()建立模型
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu",input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
#编译model。 loss目标函数为均方差,这里表面上是字符串,实际上tensorflow中会映射到对应的算法函数,我们也可以自定义
model.compile(loss="mean_squared_error", optimizer="adam")
#使用监听模型训练过程中的callbacks
logdir='./callbacks_regression'
if not os.path.exists(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir,"regression_california_housing.h5")
#首先定义一个callback数组
callbacks = [
keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file,save_best_only=True),
keras.callbacks.EarlyStopping(patience=5,min_delta=1e-3)
]
#查看model的架构
model.summary()
history=model.fit(x_train_scaler,y_train,epochs=100,
validation_data=(x_valid_scaler,y_valid),
callbacks=callbacks)
'''
Epoch 0 train mse: 1.5926356 valid mse: 1.6048584374266572
Epoch 1 train mse: 1.5171691 valid mse: 1.4116388996038987
Epoch 2 train mse: 1.2351215 valid mse: 1.3974837469756645
Epoch 3 train mse: 1.2922947 valid mse: 1.3935513074235155
Epoch 4 train mse: 1.2717088 valid mse: 1.3931517914831186
。。。