import tensorflow as tf
import numpy as np
tf.__version__
'2.3.0'
在深度学习中,我们经常需要对函数求梯度(gradient)。本节将介绍如何使用tensorflow2.0提供的GradientTape来自动求梯度。
GradientTape 可以理解为“梯度流 记录磁带”:
在记录阶段:记录被 GradientTape 包裹的运算过程中,依赖于 source node (被 watch “监视”的变量)的关系图。
在求导阶段:通过搜索 source node 到 target node 的路径,进而计算出偏微分。
source node 在记录运算过程之前进行指定:
自动“监控”所有可训练变量:GradientTape 默认(watch_accessed_variables=True)将所有可训练变量(created by tf.Variable, where trainable=True)视为需要“监控”的 source node 。
对于不可训练的变量(比如tf.constant)可以使用tape.watch()对其进行“监控”。
此外,还可以设定watch_accessed_variables=False,然后使用tf.watch()精确控制需要“监控”哪些变量
# 创建变量并赋初值
x = tf.reshape(tf.constant(range(4),dtype='float32'), (4,1))
x
with tf.GradientTape() as t:
t.watch(x) #对于Variable类型的变量,一般不用加此监控 (source node)
y = 2 * tf.matmul(tf.transpose(x),x)
g_dx = t.gradient(y,x)
g_dx
tf.GradientTape(persistent=True)
。 他可以让 Tape 对象被垃圾回收时释放资源时多次调用 gradient() 方法。with tf.GradientTape(persistent=True) as g:
g.watch(x)
y = x * x
z = y * y
dz_dx = g.gradient(z,x) # 108.0 (4*x^3 at x = 3) 注意这个不是矩阵乘积
dy_dx = g.gradient(y,x)
dz_dx,dy_dx
WARNING:tensorflow:Calling GradientTape.gradient on a persistent tape inside its context is significantly less efficient than calling it outside the context (it causes the gradient ops to be recorded on the tape, leading to increased CPU and memory usage). Only call GradientTape.gradient inside the context if you actually want to trace the gradient in order to compute higher order derivatives.
WARNING:tensorflow:Calling GradientTape.gradient on a persistent tape inside its context is significantly less efficient than calling it outside the context (it causes the gradient ops to be recorded on the tape, leading to increased CPU and memory usage). Only call GradientTape.gradient inside the context if you actually want to trace the gradient in order to compute higher order derivatives.
(,
)
def func(a):
b = a * 2
# 计算b向量的范数
while tf.norm(b) < 1000:
b = b * 2
# 对b中所有元素求和
if tf.reduce_sum(b) > 0:
c = b
else:
c = 100 * b
return c
a = tf.random.normal((1,1),dtype=tf.float32)
a
with tf.GradientTape() as t:
t.watch(a)
c = func(a)
t.gradient(c,a) == (c / a)
dir(tf.dtypes)
['DType',
'QUANTIZED_DTYPES',
'__builtins__',
'__cached__',
'__doc__',
'__file__',
'__loader__',
'__name__',
'__package__',
'__path__',
'__spec__',
'_sys',
'as_dtype',
'bfloat16',
'bool',
'cast',
'complex',
'complex128',
'complex64',
'double',
'float16',
'float32',
'float64',
'half',
'int16',
'int32',
'int64',
'int8',
'qint16',
'qint32',
'qint8',
'quint16',
'quint8',
'resource',
'saturate_cast',
'string',
'uint16',
'uint32',
'uint64',
'uint8',
'variant']
dir(tf.random)
['Algorithm',
'Generator',
'__builtins__',
'__cached__',
'__doc__',
'__file__',
'__loader__',
'__name__',
'__package__',
'__path__',
'__spec__',
'_sys',
'all_candidate_sampler',
'categorical',
'create_rng_state',
'experimental',
'fixed_unigram_candidate_sampler',
'gamma',
'get_global_generator',
'learned_unigram_candidate_sampler',
'log_uniform_candidate_sampler',
'normal',
'poisson',
'set_global_generator',
'set_seed',
'shuffle',
'stateless_binomial',
'stateless_categorical',
'stateless_gamma',
'stateless_normal',
'stateless_parameterized_truncated_normal',
'stateless_poisson',
'stateless_truncated_normal',
'stateless_uniform',
'truncated_normal',
'uniform',
'uniform_candidate_sampler']
help(tf.ones)
Help on function ones in module tensorflow.python.ops.array_ops:
ones(shape, dtype=tf.float32, name=None)
Creates a tensor with all elements set to one (1).
See also `tf.ones_like`, `tf.zeros`, `tf.fill`, `tf.eye`.
This operation returns a tensor of type `dtype` with shape `shape` and
all elements set to one.
>>> tf.ones([3, 4], tf.int32)
Args:
shape: A `list` of integers, a `tuple` of integers, or
a 1-D `Tensor` of type `int32`.
dtype: Optional DType of an element in the resulting `Tensor`. Default is
`tf.float32`.
name: Optional string. A name for the operation.
Returns:
A `Tensor` with all elements set to one (1).