本文将介绍如下内容:
在近似求导中,本质是取求导参数前后的两个偏移量的点,带入函数中,近似求导。该偏移量越小,其近似求导的数值越接近真实值。
# 对一元方程的近似求导
def f(x):
return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps=1e-3):
return (f(x + eps) - f(x - eps)) / (2. * eps)
print(approximate_derivative(f, 1.))
# ---output------
7.999999999999119
# 对二元方程的近似求导
def approximate_derivative(f, x, eps=1e-3):
return (f(x + eps) - f(x - eps)) / (2. * eps)
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
# ---output------
(8.999999999993236, 41.999999999994486)
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
# 1,打印使用的python库的版本信息
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
# 2,对一元方程的近似求导
def f(x):
return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps=1e-3):
return (f(x + eps) - f(x - eps)) / (2. * eps)
print(approximate_derivative(f, 1.))
# 3,对二元方程的近似求导
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
#---output-----
7.999999999999119
(8.999999999993236, 41.999999999994486)
梯度带是新版本tensorflow非常常用的一个特性了,因为一旦涉及到计算梯度的问题就离不开这个新的API。
注意:当函数GradientTape中的persistent可持久参数为False时,GradientTape实例化出来的对象只能调用一次gradient求导方法。
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=False,watch_accessed_variables=True) as tape:
z = g(x1, x2)
dz_x1 = tape.gradient(target=z, sources=x1)
print(dz_x1)
try:
dz_x2 = tape.gradient(target=z,sources=x2)
except RuntimeError as ex:
print(ex)
try:
dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
print(ex)
#---output--------------
tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.
当函数GradientTape中的persistent可持久参数为True时,GradientTape实例化出来的对象可以多次调用gradient求导方法。
注意: 由于在GradientTape中做了持久化,所以结束程序之前需要删除GradientTape实例化出来的对象,释放内存。
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent = True) as tape:
z = g(x1, x2)
dz_x1 = tape.gradient(target=z,sources=x1)
dz_x2 = tape.gradient(target=z,sources=x2)
print(dz_x1, dz_x2)
del tape
# ---output-----
tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)
注意:传入参数列表,返回导数结果列表
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
#---output------
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
#---output-----
[None, None]
# 对实例化的对象调用watch()方法追踪常量
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
tape.watch(x1)
tape.watch(x2)
z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
#---output---------
[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]
注意:此时会将每个方程的求导结果相加
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
z1 = 3 * x
z2 = x ** 2
print(tape.gradient([z1, z2], x))
#---output-------
tf.Tensor(13.0, shape=(), dtype=float32)
注意:返回的顺序依次为(x1,x1)、(x2,x1)、(x1,x2)、(x2,x2)。所以最后x1,x2的二阶导数为第一个和最后一个。
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
with tf.GradientTape(persistent=True) as inner_tape:
z = g(x1, x2)
inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# ---output-----
[[None, <tf.Tensor: shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=14.0>]]
梯度下降算法分为两步:
def f(x):
return 3. * x ** 2 + 2. * x - 1
# 使用matplotlib画出函数图型
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
return 3 * x ** 2 + 2 * x - 1
x = np.arange(-5., 5., 0.2)
y = sigmoid(x)
plt.grid(True)
plt.plot(x, y)
x -= learning_rate * derivatives
来实现x参数按学习率的步长向导数方向滑动。def f(x):
return 3. * x ** 2 + 2. * x - 1
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(1000):
with tf.GradientTape() as tape:
z = f(x)
dz_dx = tape.gradient(z, x)
x.assign_sub(learning_rate * dz_dx)
print(x)
learning_rate = 0.1
x = tf.Variable(0.0)
# 实例化optimizer对象
optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
with tf.GradientTape() as tape:
z = f(x)
# 求得x在函数中当前的导数
dz_dx = tape.gradient(z, x)
# 向optimizer优化器传入求得的导数和x值,更新x数值
optimizer.apply_gradients([(dz_dx, x)]) # 梯度在前,变量在后。
print(x)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
# 1,打印使用的python库的版本信息
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
# 2,下载并使用sklearn中的“fetch_california_housing”数据集
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)
# 3,拆分数据集中的数据为 训练数据、验证数据、测试数据
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state = 11)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)
# 4,在将数据带入到模型之前,先进行预处理-训练、验证、测试数据标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)
# 5,metric的使用方法
# metric的计算方法与mse_loss函数类似,使用均方误差计算
metric = keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))
print(metric([0.], [1.]))
print(metric.result())
metric.reset_states()
metric([1.], [3.])
print(metric.result())
#
# 1. batch 遍历训练集 metric
# 1.1 自动求导
# 2. epoch结束 验证集 metric
# 6,tf.GradientTape与tf.keras的结合使用,替换tf.fit方法
epochs = 100 # 训练次数
batch_size = 32 # 每次训练的每个batch数量
steps_per_epoch = len(x_train_scaled) // batch_size # 每次训练的batch数量
optimizer = keras.optimizers.SGD(learning_rate=3e-3) # 定义优化器,并传入学习率
metric = keras.metrics.MeanSquaredError() # 实例化keras的测量函数
# 从测试数据集中随机取batch_size个测试数据和标准数据
def random_batch(x, y, batch_size=32):
idx = np.random.randint(0, len(x), size=batch_size)
return x[idx], y[idx]
# 定义模型层级结构
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
for epoch in range(epochs):
# 每次训练之前,初始化keras的测量函数对象
metric.reset_states()
# 在每次训练中遍历 (训练集/每个batch数量) 次,正向反向传播训练
for step in range(steps_per_epoch):
# 调用自定义的随机取值方法,选取batch的测试数据和训练数据
x_batch, y_batch = random_batch(x_train_scaled, y_train,batch_size)
# 调用tf.GradientTape()方法,对损失函数求导,求出其导数
with tf.GradientTape() as tape:
"""with tape中写入所有求导公式"""
# 将数据带入模型,得到预测值
y_pred = model(x_batch)
# 对预测值的维度为二维数据,将数据下降到一维数据
y_pred = tf.squeeze(y_pred, 1)
# 求算损失函数的公式如下
loss = keras.losses.mean_squared_error(y_batch, y_pred)
# 调用keras的测量函数方法,使用均方误差测量误差大小
metric(y_batch, y_pred)
# 使用tape方法对函数的所有变量求偏导
grads = tape.gradient(loss, model.variables)
# 将求出的导数和模型所有的参数变量打包为元组
grads_and_vars = zip(grads, model.variables)
# 使用优化器更新参数变量
optimizer.apply_gradients(grads_and_vars)
# 打印每次训练每个batch的训练数据的均方误差
print("\rEpoch",epoch+1," train mse:",metric.result().numpy(),end="")
# 在每次正反向传播训练后,更新好模型参数变量后,带入验证数据验证数据的均方误差
y_valid_pred = model(x_valid_scaled)
# 对预测值的维度为二维数据,将数据下降到一维数据
y_valid_pred = tf.squeeze(y_valid_pred, 1)
# 求得所有验证数据集上的loss损失函数的均方误差
valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
print("\t", "valid mse: ", valid_loss.numpy())