梯度是一个向量,表示某一函数在该点处的方向导数沿着该方向取得最大值。
g r a d f ( x , y ) = ∇ f ( x , y ) = ( ∂ f ∂ x , ∂ f ∂ y ) = ∂ f ∂ x i + ∂ f ∂ x j gradf(x,y) = \nabla{f(x,y)} = (\frac{\partial f}{\partial x},\frac{\partial f}{\partial y}) = \frac{\partial f}{\partial x}i + \frac{\partial f}{\partial x}j gradf(x,y)=∇f(x,y)=(∂x∂f,∂y∂f)=∂x∂fi+∂x∂fj
梯度方向是函数增长最快的方向,因此搜索函数最小值的过程就是不断向负梯度方向移动的过程
θ t + 1 = θ t − α t ∇ f ( θ t ) \theta_{t+1}= \theta_t - \alpha_t\nabla{f(\theta_t)} θt+1=θt−αt∇f(θt)
w = tf.constant(1.)
b = tf.constant(2.)
x = tf.constant(3.)
y = w*x
with tf.GradientTape() as tape:
tape.watch([w])
y2 = w * x
grad1 = tape.gradient(y, [w])
print(grad1)#[None]
grad2 = tape.gradient(y2, [w])#non-persistent error
with tf.GradientTape() as tape:
tape.watch([w])
y2 = w * x
grad2 = tape.gradient(y2, [w])
print(grad2)#2
non-persistent 只能调用一次,用完就会释放显存,可以开启persistent选项来解决这个问题,用完以后记得手动释放
with tf.GradientTape(persistent=True) as tape:
tape.watch([w])
y = w * x
grad1 = tape.gradient(y, [w])
print(grad1)
grad1 = tape.gradient(y, [w])
print(grad1)
del tape
grad1 = tape.gradient(y, [w])
print(grad1)
w = tf.Variable(1.0)
b = tf.Variable(2.0)
x = tf.Variable(3.0)
with tf.GradientTape() as t1:
with tf.GradientTape() as t2:
y = w * x * x + w * b
dx,db = t2.gradient(y, [x, b])
print(dx,db)
dx2 = t1.gradient(dx, [x])
print(dx2)
a = tf.linspace(-10., 10., 10)
with tf.GradientTape() as tape:
tape.watch(a)
y = tf.sigmoid(a)
da = tape.gradient(y, [a])
print(a)
print(y)
print(da)
tf.tanh(a)
f ( x ) = { 0 f o r x < 0 x f o r x > 0 f(x) = \left\{ \begin{array}{rcl} 0 & \mathrm{for} & x<0 \\ x & \mathrm{for} & x>0 \\ \end{array}\right. f(x)={0xforforx<0x>0
f ′ ( x ) = { 0 f o r x < 0 1 f o r x ≥ 0 f'(x) = \left\{ \begin{array}{rcl} 0 & \mathrm{for} & x<0 \\ 1 & \mathrm{for} & x\geq0 \\ \end{array}\right. f′(x)={01forforx<0x≥0
tf.nn.relu(x)
tf.nn.leaky_relu(x)#x<0时梯度为一个很小的正数
S ( y i ) = e y i ∑ j e y i S(y_i)=\frac{e^{y_i}}{\sum\limits_{j}{e^{y_i}}} S(yi)=j∑eyieyi
∂ p i ∂ a j = { p i ( 1 − p j ) i f i = j − p i ⋅ p j i f i ≠ j \frac{\partial{p_i}}{\partial{a_j}} = \left\{ \begin{array}{rcl} p_i(1-p_j) & \mathrm{if} & i = j \\ -p_i\cdot p_j & \mathrm{if} & i\neq j \\ \end{array}\right. ∂aj∂pi={pi(1−pj)−pi⋅pjififi=ji=j
x = tf.random.normal([2, 4])
w = tf.random.normal([4, 3])
b = tf.zeros([3])
y = tf.constant([2, 0])
with tf.GradientTape() as tape:
tape.watch([w,b])
prob = tf.nn.softmax(x@w + b)
loss = tf.reduce_mean(tf.keras.losses.MSE(tf.one_hot(y, depth = 3), prob))
grads = tape.gradient(loss, [w,b])
print(grads[0])
print(grads[1])
上一章已经写了,所以这里不介绍了
with tf.GradientTape() as tape:
tape.watch([w,b])
logits = x@w + b
loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(tf.one_hot(y, depth = 3), logits, from_logits=True))
grads = tape.gradient(loss, [w,b])
print(grads[0])
由于我只是学一下tf的使用,因此课程中关于单层和多层感知机的反向传播的推导就在这不赘述了,感兴趣的可以去看吴恩达的深度学习的网课
# cd 到你的任务目录,0.0.0.0用以资磁remote,端口自定义防占用
tensorboard --logdir=./logs --host 0.0.0.0 --port=11021
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writer = tf.summary.create_file_writer(log_dir)
#以mnist为例,不过不支持subplot,只能手写
sample_img = tf.reshape(sample_img, [1, 28, 28, 1])
with summary_writer.as_default():
tf.summary.image("Training sample:", sample_img, step=0)
手写一个subplot
def plot_to_image(figure):
"""Converts the matplotlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
# Save the plot to a PNG in memory.
buf = io.BytesIO()
plt.savefig(buf, format='png')
# Closing the figure prevents it from being displayed directly inside
# the notebook.
plt.close(figure)
buf.seek(0)
# Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(), channels=4)
# Add the batch dimension
image = tf.expand_dims(image, 0)
return image
def image_grid(images):
"""Return a 5x5 grid of the MNIST images as a matplotlib figure."""
# Create a figure to contain the plot.
# https://morvanzhou.github.io/tutorials/data-manipulation/plt/4-1-subpot1/
# 可以通过以上链接学习subplot
figure = plt.figure(figsize=(10,10))
for i in range(25):
# Start next subplot.
plt.subplot(5, 5, i + 1, title='name')
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(images[i], cmap=plt.cm.binary)
return figure
val_images = x[:25]
val_images = tf.reshape(val_images, [-1, 28, 28, 1])
with summary_writer.as_default():
val_images = tf.reshape(val_images, [-1, 28, 28])
figure = image_grid(val_images)
tf.summary.image('val-images:', plot_to_image(figure), step=step)
with summary_writer.as_default():
tf.summary.scalar('train-loss', float(loss), step=step)
以上只是tensorboard的简单说明,可以查看TensorBoard 文档
这里记一个tensorflow的学习网站:
简单粗暴tensorflow2