TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
@tf.function
def has_init_scope():
my_constant = tf.constant(1.)
with tf.init_scope():
added = my_constant * 2
The graph tensor has name: Adam/cond/Identity:0
词都认得的,但不明白是个什么意思。
#自定义学习率
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, init, decay, warmup_steps = 0, warmup_rate = 1):
super().__init__()
self.value = tf.Variable(init, dtype=tf.float32)
self.decay_left = tf.cast(1 - decay, dtype=tf.float32)
self.warmup_steps = tf.cast(warmup_steps, dtype=tf.float32)
self.warmup_rate = tf.cast(warmup_rate, dtype=tf.float32)
def __call__(self, step):
def f1() : return self.value * self.warmup_rate
def f2() : return self.value * self.decay_left
self.value = tf.cond(tf.less(step, self.warmup_steps), f1, f2)
return self.value
lr = CustomSchedule(0.01, 0.02, 5.0, 1.05)
optimizer = tf.keras.optimizers.Adam(lr)
#转静态图
@tf.function
def train_step(d_in, r_out, weight = [] , validation = False):
with tf.GradientTape() as tape:
predictions = t_transformer(d_in, training=True)
loss = tf.keras.metrics.mean_squared_error(r_out, predictions)
loss = tf.cond(tf.not_equal(len(weight), 0), lambda: loss * weight, lambda: loss)
if not validation:
gradients = tape.gradient(loss, t_transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, t_transformer.trainable_variables))
train_loss(loss)
train_accuracy(r_out, predictions)
#训练
for epoch in range(EPOCHS):
start = time.time()
train_loss.reset_states()
train_accuracy.reset_states()
for i in range(0,length,batch_size):
train_step(x[i:i+batch_size], y[i:i+batch_size], weight[i:i+batch_size])
if i % 500 == 0:
print("loss {:4f} accuracy {:4f} lr {:4f}".format(train_loss.result(), train_accuracy.result(), lr.value))
train_step经过tf.function修饰转为静态图,而其中调用的优化器的学习率lr代码是eager模式的,把代码改为如下,这个错误解决,
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, init, decay, warmup_steps = 0, warmup_rate = 1):
super().__init__()
self.value = tf.Variable(init, dtype=tf.float32)
self.decay_left = tf.cast(1 - decay, dtype=tf.float32)
self.warmup_steps = tf.cast(warmup_steps, dtype=tf.float32)
self.warmup_rate = tf.cast(warmup_rate, dtype=tf.float32)
@tf.function ##增加转动态图的修饰符
def __call__(self, step):
def f1() : return self.value * self.warmup_rate
def f2() : return self.value * self.decay_left
self.value = tf.cond(tf.less(step, self.warmup_steps), f1, f2)
return self.value
但又出现如下的错误,
FailedPreconditionError: Could not find variable _AnonymousVar1159. This could mean that the variable has been deleted. In TF1, it can also mean the variable is uninitialized. Debug info: container=localhost, status=Not found: Resource localhost/_AnonymousVar1159/N10tensorflow3VarE does not exist.
[[{{node Adam/StatefulPartitionedCall/cond/then/_7/cond/ReadVariableOp}}]] [Op:__inference_train_step_41600]
Function call stack:
train_step
再次修改代码如下,解决,
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, init, decay, warmup_steps = 0, warmup_rate = 1):
super().__init__()
self.value = tf.Variable(init, dtype=tf.float32)
self.decay_left = tf.cast(1 - decay, dtype=tf.float32)
self.warmup_steps = tf.cast(warmup_steps, dtype=tf.float32)
self.warmup_rate = tf.cast(warmup_rate, dtype=tf.float32)
@tf.function
def __call__(self, step):
def f1() : return self.value * self.warmup_rate
def f2() : return self.value * self.decay_left
self.value.assign(tf.cond(tf.less(step, self.warmup_steps), f1, f2)) # 用assign赋值
return self.value