tensorflowGPU优化之利用timeline详细查看各部分op时间

在利用tensorflow写程序是,我们常常会碰到GPU利用率始终不高的情况,这时我们需要详细了解程序结点的消耗时间,tensorboard提供了一个窗口,但仍不详细,这里介绍timeline的使用,他可以更详细的给出各部分op的时间消耗,让你了解程序的瓶颈

简单的例子:

import tensorflow as tf
from tensorflow.python.client import timeline

a = tf.random_normal([2000, 5000])
b = tf.random_normal([5000, 1000])
res = tf.matmul(a, b)

with tf.Session() as sess:
    #options 和run_metadata必须添加,run_metadata记录训练元信息,这样可以查看某轮迭代计算的时间消耗,内存占用等情况
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    sess.run(res, options=options, run_metadata=run_metadata)

    # 保存为json文件
    fetched_timeline = timeline.Timeline(run_metadata.step_stats)
    chrome_trace = fetched_timeline.generate_chrome_trace_format()
    with open('timeline_01.json', 'w') as f:
        f.write(chrome_trace)

如上,是一个最简单的timeline用法,得到json文件后,需要在谷歌浏览器网址中输入chrome://tracing,打开后传入json文件解析,可以看到如下的图
tensorflowGPU优化之利用timeline详细查看各部分op时间_第1张图片

更复杂常用的例子

import os
import tempfile

import tensorflow as tf
from tensorflow.contrib.layers import fully_connected as fc
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.client import timeline

batch_size = 100

inputs = tf.placeholder(tf.float32, [batch_size, 784])
targets = tf.placeholder(tf.float32, [batch_size, 10])

with tf.variable_scope("layer_1"):
    fc_1_out = fc(inputs, num_outputs=500, activation_fn=tf.nn.sigmoid)
with tf.variable_scope("layer_2"):
    fc_2_out = fc(fc_1_out, num_outputs=784, activation_fn=tf.nn.sigmoid)
with tf.variable_scope("layer_3"):
    logits = fc(fc_2_out, num_outputs=10)

loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

if __name__ == '__main__':
    mnist_save_dir = os.path.join(tempfile.gettempdir(), 'MNIST_data')
    mnist = input_data.read_data_sets(mnist_save_dir, one_hot=True)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        for i in range(3):
            batch_input, batch_target = mnist.train.next_batch(batch_size)
            feed_dict = {inputs: batch_input,
                         targets: batch_target}

            sess.run(train_op,
                     feed_dict=feed_dict,
                     options=options,
                     run_metadata=run_metadata)

            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open('timeline_02_step_%d.json' % i, 'w') as f:
                f.write(chrome_trace)

这里保存了多步的结果,你可以采用同样的方法查看文件。

你可能感兴趣的:(tensorflow)