tf工具相关

1. 从event file解析loss:

import tensorflow as tf
from tensorflow.python.summary import summary_iterator
event_file = 'events.filename'
for event in summary_iterator.summary_iterator(event_file):
  if event.HasField('summary'):
    event_eval_result = {}
    for value in event.summary.value:
      if value.HasField('simple_value') and value.tag == 'loss':
        print(value.simple_value)

 

2. 多进程找出有问题的tfrecords:

import tensorflow as tf
import glob
import multiprocessing



def task2(id, q):
  while not q.empty():
      file = q.get()
      try:
        a=[1 for _ in tf.python_io.tf_record_iterator(file)]
      except Exception as e:
        print("=====",file)
  return None

pool = multiprocessing.Pool()
m = multiprocessing.Manager()
cpus = multiprocessing.cpu_count()
q = m.Queue()
results = []
train_files = sorted(glob.glob('/path/to/tfrecords_dir/*'))
for each in train_files:
  q.put(each)
for i in range(cpus):
    results.append(pool.apply_async(task2, args=(i,q)))
pool.close()
pool.join()

for result in results:
  result.get()

3.往ckpt里面添加变量

import os,sys
import tensorflow as tf
import horovod.tensorflow as hvd
hvd.init()
ckpt_path=sys.argv[1]
tf.get_variable('var_name',dtype=tf.int32,shape=[],initializer=tf.constant_initializer(0))
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  saver = tf.train.import_meta_graph(ckpt_path+'.meta')
  saver.restore(sess, ckpt_path)
  path=sys.argv[2]#
  saver2 = tf.train.Saver()
  ckpt_name=ckpt_path[ckpt_path.rindex('/')+1:]
  tf.gfile.MakeDirs(path)
  os.chdir(path)
  print(saver2.save(sess, ckpt_name))

4. 查询参数的梯度:


x = tf.Variable(1.0, name="x")
y = tf.add(x, x, name="y")
z = tf.square(debug_y)

# Create a train op under the grad_debugger context.
grad_debugger = tf_debug.GradientsDebugger()
with grad_debugger.watch_gradients_by_tensor_names(r"(x|y):0$"):
  train_op = tf.compat.v1.train.GradientDescentOptimizer(z)

# Now we can reflect through grad_debugger to get the gradient tensor
# with respect to x and y.
x_grad = grad_debugger.gradient_tensor("x:0")
y_grad = grad_debugger.gradient_tensor("y:0")

5. 打印日志 TF_CPP_MIN_VLOG_LEVEL=1

你可能感兴趣的:(技巧)