tensorflow2------自定义求导

我们在前面的模型的训练过程中model.compile函数中指定的optimizer就是求导的算法类型,一般我们直接指定为sgd、adam算法求导,这里我们可以不依赖于tensorflow中相关算法求导,转而选择使用自定义的算法求导,这样就可以满足一些特定的需求。

import matplotlib as mpl #画图用的库
import matplotlib.pyplot as plt
#下面这一句是为了可以在notebook中画图
%matplotlib inline
import numpy as np
import sklearn   #机器学习算法库
import pandas as pd #处理数据的库   
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras   #使用tensorflow中的keras
#import keras #单纯的使用keras
 
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, sklearn, pd, tf, keras:
    print(module.__name__, module.__version__)
############对一元函数进行求导############
def f(x):
    return 3. * x ** 2 + 2. * x - 1

#近似求导函数
def approximate_derivative(f, x, eps = 1e-3):#传入函数f,x,eps用来求导数的
    return (f(x+eps) - f(x-eps)) / (2. * eps)

print(approximate_derivative(f, 1.))



7.999999999999119
############对二元函数进行求导############
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)#lambda表达式生成的函数(x2是固定值)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)#lambda表达式生成的函数(x1是固定值)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))




(8.999999999993236, 41.999999999994486)
##### 变量求导1 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
print(dz_x1)


#没有被保存的tape只能被使用一次,所以这里会报异常
try :
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)


with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x2 = tape.gradient(z, x2)
print(dz_x2)




tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.
tf.Tensor(42.0, shape=(), dtype=float32)
##### 变量求导2 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)

dz_x1 = tape.gradient(z, x1)#tape求对x1的偏导
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)

del tape#手动删除tape



tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)
##### 变量求导3 #####

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对x1的偏导
print(dz_x1x2)



[, ]
#常量求导
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
# persistent=True表示 tape会被保存,所以我们需要手动删除,系统不会自动释放
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])#tape求对 x1、x2的偏导,所以这里有两个输出结果
print(dz_x1x2)



[, ]
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)# 求z1、z2 对 x 的导数,然后求和




x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:#定义外层tape
    with tf.GradientTape(persistent=True) as inter_tape:
        z=g(x1, x2)
    inter_grads = inter_tape.gradient(z, [x1, x2]) #求对 x1、x2 的偏导,所以这里有两个输出结果
outer_grads = [outer_tape.gradient(inter_grad, [x1, x2]) for inter_grad in inter_grads] #对inter_grads每个结果求偏导

print(outer_grads)
del inter_grads
del outer_grads



[[None, ], [, ]]
#模拟梯度下降
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z=f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx) #对x进行更新 x = x - learning_rate * dz_dx
print(x)




#模拟梯度下降并结合optimizer使用
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr=learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z=f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])

print(x)




依据前面的房价回归问题上,我们使用自定义求导来实现模型的训练,这里不贴出所有代码,仅模型相关代码如下:

#metric使用

#直接调用均方差函数 MeanSquaredError()
metric=keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))#这里单独输出为 9
print(metric([0.], [1.]))#这里单独输出为 1
print(metric.result())#累加总的结果输出为 1/2 * (9+1) = 5

#不想累加的话调用reset_states
metric.reset_states()
metric([1.],[3.])
print(metric.result())



tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
#1.batch 遍历训练集 metric
#      自动求导
#2. epoch结束 验证集 metric
epochs=100
batch_size=32#batch_size表示一次训练的样本数
steps_per_epoch=len(x_train_scaler) // batch_size # 除以batch_size结果取整,表示每个epoch训练样本的次数
optimizer=keras.optimizers.SGD()# optimizer选择 sgd
metric=keras.metrics.MeanSquaredError()#损失函数 mse均方差

#自定义一次随机训练取出的32个样本数
def random_batch(x, y, batch_size=32):
    idx=np.random.randint(0,len(x),size=batch_size)#从 0 到 len(x)总的数量中 随机取出32个索引
    return x[idx],y[idx]

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu",input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])

for epoch in range(epochs):
    metric.reset_states()#防止平方差值累加
    for step in range(steps_per_epoch):
        x_batch, y_batch = random_batch(x_train_scaler, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch,y_pred)
        grads = tape.gradient(loss, model.variables)
        grads_and_vars = zip(grads, model.variables)
        optimizer.apply_gradients(grads_and_vars)
        print("\rEpoch", epoch, " train mse:", metric.result().numpy(), end="")
    y_valid_pred = model(x_valid_scaler)
    valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid_pred, y_valid))
    print("\t", "valid mse: ", valid_loss.numpy())

'''
#tf.keras.models.Sequential()建立模型

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu",input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])
#编译model。 loss目标函数为均方差,这里表面上是字符串,实际上tensorflow中会映射到对应的算法函数,我们也可以自定义
model.compile(loss="mean_squared_error", optimizer="adam")

#使用监听模型训练过程中的callbacks
logdir='./callbacks_regression'
if not os.path.exists(logdir):
    os.mkdir(logdir)
output_model_file = os.path.join(logdir,"regression_california_housing.h5")

#首先定义一个callback数组
callbacks = [
    keras.callbacks.TensorBoard(logdir),
    keras.callbacks.ModelCheckpoint(output_model_file,save_best_only=True),
    keras.callbacks.EarlyStopping(patience=5,min_delta=1e-3)
]

#查看model的架构
model.summary()

history=model.fit(x_train_scaler,y_train,epochs=100,
                 validation_data=(x_valid_scaler,y_valid),
                 callbacks=callbacks)
'''



Epoch 0  train mse: 1.5926356	 valid mse:  1.6048584374266572
Epoch 1  train mse: 1.5171691	 valid mse:  1.4116388996038987
Epoch 2  train mse: 1.2351215	 valid mse:  1.3974837469756645
Epoch 3  train mse: 1.2922947	 valid mse:  1.3935513074235155
Epoch 4  train mse: 1.2717088	 valid mse:  1.3931517914831186
。。。

 

你可能感兴趣的:(#,tensorflow,tensorflow自定义求导)