提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
由于复现需要用到自定义Loss,参考了TensorFlow的例子:
https://tensorflow.google.cn/guide/keras/customizing_what_happens_in_fit
有两种实现方法,但是发现用compile传递的Loss总会得不到想要的结果,现象跟下面链接网友描述的基本一直,但是该网友也没有给出合理解释:
http://blog.itpub.net/69955379/viewspace-2786299/
示例(前置代码):
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1)
tf.random.set_seed(1)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
代码如下(示例):
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = tf.keras.layers.Conv2D(10, 3)
self.layer2 = tf.keras.layers.Flatten()
self.layer3 = tf.keras.layers.Dense(128, activation='relu')
self.layer4 = tf.keras.layers.Dense(2)
self.layer5 = tf.keras.layers.Dense(10, activation='softmax')
self.inputs = tf.keras.Input(shape=(28, 28))
self.call(self.inputs)
self.loss_tracker = keras.metrics.Mean(name="loss")
self.acc_metric = keras.metrics.SparseCategoricalAccuracy()
def call(self, x):
x = tf.expand_dims(x, 3)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
return x
@property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [self.loss_tracker, self.acc_metric]
def my_loss(self, y_true, y_pred):
y_true = tf.one_hot(y_true, y_pred.shape[1])
y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
ret = tf.math.reduce_sum(-tf.math.multiply(y_true,tf.math.log(y_pred)), 1)
ret = tf.math.reduce_mean(ret)
return ret
def train_step(self, data):
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute the loss value
# (the loss function is configured in `compile()`)
loss = self.my_loss(y, y_pred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
# Return a dict mapping metric names to current value
self.loss_tracker.update_state(loss)
self.acc_metric.update_state(y, y_pred)
return {"loss": self.loss_tracker.result(), "acc": self.acc_metric.result()}
# Create an instance of the model
model = MyModel()
model.compile(optimizer='adam')
model.fit(x_train, y_train, epochs=1)
model.evaluate(x_test, y_test, verbose=2)
1875/1875 [==============================] - 29s 15ms/step - loss: 0.6923 - acc: 0.6537
代码如下(示例):
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = tf.keras.layers.Conv2D(10, 3)
self.layer2 = tf.keras.layers.Flatten()
self.layer3 = tf.keras.layers.Dense(128, activation='relu')
self.layer4 = tf.keras.layers.Dense(2)
self.layer5 = tf.keras.layers.Dense(10, activation='softmax')
self.inputs = tf.keras.Input(shape=(28, 28))
self.call(self.inputs)
def call(self, x):
x = tf.expand_dims(x, 3)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
return x
def my_loss(self, y_true, y_pred):
y_true = tf.one_hot(y_true, y_pred.shape[1])
y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
ret = tf.math.reduce_sum(-tf.math.multiply(y_true,tf.math.log(y_pred)), 1)
ret = tf.math.reduce_mean(ret)
return ret
def train_step(self, data):
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute the loss value
# (the loss function is configured in `compile()`)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
# Create an instance of the model
model = MyModel()
model.compile(optimizer='adam',
loss=model.my_loss,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1)
model.evaluate(x_test, y_test, verbose=2)
1875/1875 [==============================] - 25s 13ms/step - loss: 7.3672 - accuracy: 0.1324
两次Loss函数一样,但是就是没法正常收敛,loss值明显不对,accuracy增长非常慢。如果换成内置的loss,非自定义的tf.keras.losses.SparseCategoricalCrossentropy(),训练恢复正常。
p = model(x_train[:32])
t = y_train[:32]
L = tf.keras.losses.SparseCategoricalCrossentropy()
print(L(t, p))
print(model.my_loss(t, p))
tf.Tensor(2.2893891, shape=(), dtype=float32)
tf.Tensor(2.2893891, shape=(), dtype=float32)
可以看到,loss计算本身没有问题
继续测试
p = model(x_train[:32])
t = y_train[:32]
L = tf.keras.losses.SparseCategoricalCrossentropy()
print(L(t, p))
print(model.my_loss(t, p))
print(model.compiled_loss(t, p, regularization_losses=model.losses))
tf.Tensor(2.2893891, shape=(), dtype=float32)
tf.Tensor(2.2893891, shape=(), dtype=float32)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [126], in
4 print(L(t, p))
5 print(model.my_loss(t, p))
----> 6 print(model.compiled_loss(t, p, regularization_losses=model.losses))
File d:\programdata\miniconda3\lib\site-packages\keras\engine\compile_utils.py:199, in LossesContainer.__call__(self, y_true, y_pred, sample_weight, regularization_losses)
196 if y_t is None or loss_obj is None: # Ok to have no loss for an output.
197 continue
--> 199 y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
200 sw = apply_mask(y_p, sw, get_mask(y_p))
201 loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File d:\programdata\miniconda3\lib\site-packages\keras\engine\compile_utils.py:675, in match_dtype_and_rank(y_t, y_p, sw)
673 def match_dtype_and_rank(y_t, y_p, sw):
674 """Match dtype and rank of predictions."""
--> 675 if y_t.shape.rank == 1 and y_p.shape.rank == 2:
676 y_t = tf.expand_dims(y_t, axis=-1)
677 if sw is not None:
AttributeError: 'tuple' object has no attribute 'rank'
发现,直接调用compiled_loss,居然会报错:AttributeError: ‘tuple’ object has no attribute ‘rank’
经测试,numpy对象的shape没有rank,只有tensor对象的shape有rank属性。
所以猜想compiled_loss只接收tensor输入,compile本身会对输入数据进行调整,导致输入跟手动实现时候输入不一致。
p = model(x_train[:32])
t = y_train[:32]
L = tf.keras.losses.SparseCategoricalCrossentropy()
print(L(t, p))
print(model.my_loss(t, p))
print(model.compiled_loss(tf.constant(t), p, regularization_losses=model.losses))
tf.Tensor(2.2893891, shape=(), dtype=float32)
tf.Tensor(2.2893891, shape=(), dtype=float32)
tf.Tensor(7.330165, shape=(), dtype=float32)
新代码不会出错,loss值会跟我们打印的一样有异常,所以大胆猜测,就是compile会调整输入值,让原来的输入适应成一个tensor,因此会改变输入的维度。
在loss添加打印信息
def my_loss(self, y_true, y_pred):
print()
y_true = tf.one_hot(y_true, y_pred.shape[1])
print('y_true:', y_true)
y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
print('y_pred:', y_pred)
ret = tf.math.reduce_sum(-tf.math.multiply(y_true,tf.math.log(y_pred)), 1)
ret = tf.math.reduce_mean(ret)
return ret
y_true: Tensor("my_loss/one_hot:0", shape=(32, 1, 10), dtype=float32)
y_pred: Tensor("my_loss/clip_by_value:0", shape=(32, 10), dtype=float32)
y_true: Tensor("my_loss/one_hot:0", shape=(32, 1, 10), dtype=float32)
y_pred: Tensor("my_loss/clip_by_value:0", shape=(32, 10), dtype=float32)
1875/1875 [==============================] - 26s 13ms/step - loss: 7.3656 - accuracy: 0.1488
可以看到y_true和y_pred的维度不一致了,证实我们猜想,下一步就是要适应维度的变化。
def my_loss(self, y_true, y_pred):
print()
y_true = tf.one_hot(y_true, y_pred.shape[1])
y_true = tf.reshape(y_true, (-1, y_pred.shape[1]))
print('y_true:', y_true)
y_pred = tf.clip_by_value(y_pred, 1e-9, 1)
print('y_pred:', y_pred)
ret = tf.math.reduce_sum(-tf.math.multiply(y_true,tf.math.log(y_pred)), 1)
ret = tf.math.reduce_mean(ret)
return ret
y_true: Tensor("my_loss/Reshape:0", shape=(32, 10), dtype=float32)
y_pred: Tensor("my_loss/clip_by_value:0", shape=(32, 10), dtype=float32)
y_true: Tensor("my_loss/Reshape:0", shape=(32, 10), dtype=float32)
y_pred: Tensor("my_loss/clip_by_value:0", shape=(32, 10), dtype=float32)
1875/1875 [==============================] - 26s 13ms/step - loss: 0.6369 - accuracy: 0.7990
适应后,accuracy可以快速提升,loss也能正常下降收敛。
实现自定义loss以及自定义train_step时候,通过compile传递Loss Function和通过纯手工实现Loss,在Loss输入上会有一定的区别,complie可能会对输入data进行调整(如上面例子:正常输入y_true是numpy对象,compile_loss会转成tensor对象再进行计算,这个转换过程是不可见的,会导致比预期多了一个维度),导致输入数据维度与预期不符合。
在对应的实现方法里,对维度进行对应的适配就可以避免这个问题。
困扰我两天时间,卡我进度,特此记下。用了十几年CSDN,第一次写下blog。