花费数天时间翻阅tensorflow源码,终于踩平这个坑!
使用TF-Debugger时,要设置run_option中的debug_tensor_watch_opts,在session run的时候,传入run_option,一个简单的例子如下:
建一个连个节点相加的简单graph,保存为pb文件
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import graph_util
with tf.Session() as sess:
a = tf.placeholder(tf.float32,[1],name='a')
b = tf.placeholder(tf.float32,[1],name='b')
print b
c = tf.add(a,b,name='c')
print c.name
sess.run(tf.global_variables_initializer())
print sess.run(c,feed_dict={a:[5.0],b:[4.0]})
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["c"])
tf.train.write_graph(constant_graph, 'models/', 'graph.pb', as_text=False)
读取刚刚保存为pb文件的模型,并在session run的时候输出debug信息
import tensorflow as tf
from tensorflow.python.framework import graph_util
import numpy as np
from tensorflow.python import debug as tf_debug
#debug watch setting
def addWatch(run_options,node_name,debug_urls):
watch_opts = run_options.debug_options.debug_tensor_watch_opts
run_options.debug_options.global_step = 1
watch = watch_opts.add()
watch.node_name=node_name
watch.output_slot=0
debug_ops="DebugIdentity"
if isinstance(debug_ops, str):
debug_ops = [debug_ops]
watch.debug_ops.extend(debug_ops)
if debug_urls:
if isinstance(debug_urls, str):
debug_urls = [debug_urls]
watch.debug_urls.extend(debug_urls)
#main func
modelpath='./'
dbgInfoPath="file:///your_path/dbginfo"
output_graph_path = modelpath+'models/graph.pb'
with tf.Session() as sess:
tf.global_variables_initializer().run()
output_graph_def = tf.GraphDef()
with tf.gfile.GFile(output_graph_path, 'rb') as fid:
serialized_graph = fid.read()
output_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(output_graph_def, name='')
a = sess.graph.get_tensor_by_name("a:0")
print a
b = sess.graph.get_tensor_by_name("b:0")
print b
c = sess.graph.get_tensor_by_name("c:0")
print c
run_options = tf.RunOptions()
ops=sess.graph.get_operations()
for op in ops:
if not op.outputs:
continue
node_name=op.name
addWatch(run_options,node_name,dbgInfoPath)
print run_options.debug_options.debug_tensor_watch_opts
sess.run(tf.global_variables_initializer())
print sess.run(c,feed_dict={a:[3.0],b:[4.0]},options=run_options)
在命令行下使用命令
python -m tensorflow.python.debug.cli.offline_analyzer --dump_dir=/your_path/dbginfo/
就可进入tfdbg界面
与python版类似,使用TF-Debugger时,要设置run_option中的debug_tensor_watch_opts,在session run的时候,传入run_option,但是如果按照正常方法,会提示tfdbg not linked in this tensorflow build,因此需要在编译时设置一些东西。
使用tensorflow for c++之前,需要编译目标libtensorflow_cc.so,即使用命令:
bazel build --config=opt //tensorflow:libtensorflow_cc.so
然而仔细查看tensorflow下的BUILD文件,发现目标libtensorflow_cc.so中的依赖项并没有包含tensoflow/core/debug中的源文件。
因此需要在tensorflow/tensorflow/BUILD文件中加入一行
tf_cc_shared_object(
name = "libtensorflow_cc.so",
linkopts = select({
"//tensorflow:darwin": [
"-Wl,-exported_symbols_list", # This line must be directly followed by the exported_symbols.lds file
"//tensorflow:tf_exported_symbols.lds",
],
"//tensorflow:windows": [],
"//tensorflow:windows_msvc": [],
"//conditions:default": [
"-z defs",
"-s",
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"//tensorflow:tf_version_script.lds",
],
}),
deps = [
"//tensorflow:tf_exported_symbols.lds",
"//tensorflow:tf_version_script.lds",
"//tensorflow/c:c_api",
"//tensorflow/c/eager:c_api",
"//tensorflow/cc:cc_ops",
"//tensorflow/cc:client_session",
"//tensorflow/cc:scope",
"//tensorflow/core:tensorflow",
"//tensorflow/core/debug",#这行我是加上去的
],
)
改完后再使用命令
bazel build --config=opt //tensorflow:libtensorflow_cc.so
编译成功,此时tfdbg功能已经被link到libtensorflow_cc.so中了
main.cpp:
#include
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/util/device_name_utils.h"
#include "tensorflow/core/protobuf/config.pb.h"
#include "tensorflow/core/protobuf/debug.pb.h"
#include "tensorflow/core/framework/graph.pb.h"
using namespace tensorflow;
std::string dbgInfoPath="file:///your_path/dbginfo";
int main(int argc, char* argv[]) {
// Initialize a tensorflow session
Session* session;
tensorflow::SessionOptions opts;
//read model
GraphDef graph_def;
Status status = ReadBinaryProto(Env::Default(), "models/graph.pb", &graph_def);
if (!status.ok()) {
std::cout << status.ToString() << "\n";
return 1;
}
//log node device
opts.config.set_log_device_placement(true);
//setup session
status = NewSession(opts, &session);
if (!status.ok()) {
printf("NewSession\n");
std::cout << status.ToString() << "\n";
return 1;
}
//create graph
status = session->Create(graph_def);
if (!status.ok()) {
std::cout << status.ToString() << "\n";
return 1;
}
//input and output tensor
Tensor a(DT_FLOAT, TensorShape());
a.scalar<float>()() = 3.0;
Tensor b(DT_FLOAT, TensorShape());
b.scalar<float>()() = 2.0;
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ "a", a },
{ "b", b },
};
std::vector outputs;
//run_option setting
tensorflow::RunOptions ops=tensorflow::RunOptions();
tensorflow::RunMetadata run_metadata;
//debug_option setting
tensorflow::DebugOptions* dp=ops.mutable_debug_options();
int node_count = graph_def.node_size();
for(int i=0;iauto node = graph_def.mutable_node(i);
DebugTensorWatch* watch = dp->add_debug_tensor_watch_opts();
watch->set_node_name(node->name());
std::cout<name()<<std::endl;
watch->set_output_slot(0);
watch->add_debug_ops("DebugIdentity");
watch->add_debug_urls(dbgInfoPath.c_str());
}
//delete exist dbginfo
system("rm -rf ./dbginfo/");
//test if has debug options
if(ops.has_debug_options())
printf("ops has debug_options,debug_tensor_watch_opts_size:%d\n",dp->debug_tensor_watch_opts_size());
else
printf("ops has no debug_options()\n");
//session run
status = session->Run(ops,inputs, {"c"}, {}, &outputs,&run_metadata);
if (!status.ok()) {
printf("Session Run Failed\n");
std::cout << status.ToString() << "\n";
return 1;
}
//print output
auto output_c = outputs[0].scalar<float>();
std::cout << outputs[0].DebugString() << "\n"; // Tensor
std::cout << output_c() << "\n"; // 30
//close session
session->Close();
return 0;
}
Makefile:
CXX = g++
INCPATHS = -I./
INCPATHS += -I/usr/local/lib/python2.7/dist-packages/tensorflow/include/
INCPATHS += -I/usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public/
LIBPATH = -L./
LIBPATH += -L/usr/local/lib
LIBS = -Wl,-rpath=./
LIBS += -ltensorflow_cc
LIBS += -ltensorflow_framework
EXE = main
$(EXE):
$(CXX) -std=c++11 -o $(EXE) main.cpp $(INCPATHS) $(LIBPATH) $(LIBS)
将程序中以及Makefile中的路径换成自己的路径,然后make,执行程序。
使用命令,查看dbginfo
python -m tensorflow.python.debug.cli.offline_analyzer --dump_dir=/your_path/dbginfo/