本文根据谷歌Tensorflow的官方事例进行介绍。官方事例在自定义训练:演示中查看。通过该实例可以更好的了解GradientTape的使用,尤其在自定义网络损失函数时尤其重要。
在原事例中使用的是url下载谷歌的鸢尾花数据集,由于网络原因下载失败,于是选用sklearn中的鸢尾花数据集。具体实现代码如下:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
features = iris["data"] #shape: (150, 4)
labels = iris["target"] #shape: (150,)
class_names = iris["target_names"] #['setosa' 'versicolor' 'virginica']
定义模型、损失函数、建立梯度带实现代码如下所示:
# 建立简单的全连接网络
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4,)), # 需要给出输入的形式
tf.keras.layers.Dense(10, activation=tf.nn.relu),
tf.keras.layers.Dense(3)
])
# 建立损失函数
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
def loss(model, x, y):
y_ = model(x) #模型的预测值
return loss_object(y_true=y, y_pred=y_)
# 建立梯度带计算loss的梯度,应用Adam优化器对loss进行优化
def grad(model, inputs, targets):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss_value, grads = grad(model, features, labels)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
训练模型,代码如下所示:
# 保留结果用于绘制
train_loss_results = []
train_accuracy_results = []
num_epochs = 201
batches = 30
for epoch in range(num_epochs):
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
# Training loop - using batches of 30
for i in range(5):
x = features[i * batches:(i+1) * batches, :]
y = labels[i * batches:(i+1) * batches]
# 优化模型
loss_value, grads = grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# 追踪进度
epoch_loss_avg(loss_value) # 添加当前的 batch loss
# 比较预测标签与真实标签
epoch_accuracy(y, model(x))
# 循环结束
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
if epoch % 50 == 0:
print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
epoch_loss_avg.result(),
epoch_accuracy.result()))
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')
axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)
axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)
plt.show()
对数据进行预测,代码如下所示:
# 预测数据
predict_dataset = tf.convert_to_tensor([5.1, 3.3, 1.7, 0.5,],
[5.9, 3.0, 4.2, 1.5,],
[6.9, 3.1, 5.4, 2.1]])
# 预测结果
predictions = model(predict_dataset)
for i, logits in enumerate(predictions):
class_idx = tf.argmax(logits).numpy()
p = tf.nn.softmax(logits)[class_idx]
name = class_names[class_idx]
print("Example {} prediction: {} ({:4.1f}%)".format(i, name, 100*p))
输出预测结果如下所示:
Example 0 prediction: setosa (99.7%)
Example 1 prediction: versicolor (99.7%)
Example 2 prediction: virginica (96.2%)