install bazel in https://docs.bazel.build/versions/main/install-ubuntu.html  https://github.com/bazelbuild/bazelisk/releases

root@jumper-MS-7B47:~/tensorflow# ./configure
You have bazel 3.7.2 installed.
Please specify the location of python. [Default is /usr/local/bin/python3]: 

Found possible Python library paths:
Please input the desired Python library path to use.  Default is [/usr/local/lib/python3.7/site-packages]

Do you wish to build TensorFlow with ROCm support? [y/N]: 
No ROCm support will be enabled for TensorFlow.

Do you wish to build TensorFlow with CUDA support? [y/N]: 
No CUDA support will be enabled for TensorFlow.

Do you wish to download a fresh release of clang? (Experimental) [y/N]: 
Clang will not be downloaded.

Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -Wno-sign-compare]: 

Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: 
Not configuring the WORKSPACE for Android builds.

Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
	--config=mkl         	# Build with MKL support.
	--config=mkl_aarch64 	# Build with oneDNN and Compute Library for the Arm Architecture (ACL).
	--config=monolithic  	# Config for mostly static monolithic build.
	--config=numa        	# Build with NUMA support.
	--config=dynamic_kernels	# (Experimental) Build kernels into separate shared objects.
	--config=v1          	# Build with TensorFlow 1 API instead of TF 2 API.
Preconfigured Bazel build configs to DISABLE default on features:
	--config=nogcp       	# Disable GCP support.
	--config=nonccl      	# Disable NVIDIA NCCL support.
Configuration finished
看这里默认是TFv1-without contrib版本。

root@jumper-MS-7B47:~/tensorflow# bazel build --config=v2 --config=mkl -c opt --copt=-march=native //tensorflow:libtensorflow_cc.so
Starting local Bazel server and connecting to it...
WARNING: The following configs were expanded more than once: [v2]. For repeatable flags, repeats are counted twice and may lead to unexpected behavior.
INFO: Options provided by the client:
  Inherited 'common' options: --isatty=1 --terminal_columns=80
INFO: Reading rc options for 'build' from /root/tensorflow/.bazelrc:
  Inherited 'common' options: --experimental_repo_remote_exec
INFO: Reading rc options for 'build' from /root/tensorflow/.bazelrc:
  'build' options: --define framework_shared_object=true --java_toolchain=@tf_toolchains//toolchains/java:tf_java_toolchain --host_java_toolchain=@tf_toolchains//toolchains/java:tf_java_toolchain --define=use_fast_cpp_protos=true --define=allow_oversize_protos=true --spawn_strategy=standalone -c opt --announce_rc --define=grpc_no_ares=true --noincompatible_remove_legacy_whole_archive --enable_platform_specific_config --define=with_xla_support=true --config=short_logs --config=v2 --define=no_aws_support=true --define=no_hdfs_support=true
INFO: Reading rc options for 'build' from /root/tensorflow/.tf_configure.bazelrc:
  'build' options: --action_env PYTHON_BIN_PATH=/usr/local/bin/python3 --action_env PYTHON_LIB_PATH=/usr/local/lib/python3.7/site-packages --python_path=/usr/local/bin/python3
INFO: Found applicable config definition build:short_logs in file /root/tensorflow/.bazelrc: --output_filter=DONT_MATCH_ANYTHING
INFO: Found applicable config definition build:v2 in file /root/tensorflow/.bazelrc: --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1
INFO: Found applicable config definition build:v2 in file /root/tensorflow/.bazelrc: --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1
INFO: Found applicable config definition build:mkl in file /root/tensorflow/.bazelrc: --define=build_with_mkl=true --define=enable_mkl=true --define=tensorflow_mkldnn_contraction_kernel=0 --define=build_with_openmp=true -c opt
INFO: Found applicable config definition build:linux in file /root/tensorflow/.bazelrc: --copt=-w --host_copt=-w --define=PREFIX=/usr --define=LIBDIR=$(PREFIX)/lib --define=INCLUDEDIR=$(PREFIX)/include --define=PROTOBUF_INCLUDE_PATH=$(PREFIX)/include --cxxopt=-std=c++14 --host_cxxopt=-std=c++14 --config=dynamic_kernels --distinct_host_configuration=false
INFO: Found applicable config definition build:dynamic_kernels in file /root/tensorflow/.bazelrc: --define=dynamic_loaded_kernels=true --copt=-DAUTOLOAD_DYNAMIC_KERNELS
DEBUG: /root/.cache/bazel/_bazel_root/efb88f6336d9c4a18216fb94287b8d97/external/tf_runtime/third_party/cuda/dependencies.bzl:51:10: The following command will download NVIDIA proprietary software. By using the software you agree to comply with the terms of the license agreement that accompanies the software. If you do not agree to the terms of the license agreement, do not use the software.
DEBUG: Rule 'io_bazel_rules_docker' indicated that a canonical reproducible form can be obtained by modifying arguments shallow_since = "1556410077 -0400"
DEBUG: Repository io_bazel_rules_docker instantiated at:
  /root/tensorflow/WORKSPACE:23:14: in 
  /root/tensorflow/tensorflow/workspace0.bzl:108:34: in workspace
  /root/.cache/bazel/_bazel_root/efb88f6336d9c4a18216fb94287b8d97/external/bazel_toolchains/repositories/repositories.bzl:37:23: in repositories
Repository rule git_repository defined at:
  /root/.cache/bazel/_bazel_root/efb88f6336d9c4a18216fb94287b8d97/external/bazel_tools/tools/build_defs/repo/git.bzl:199:33: in 
INFO: Analyzed target //tensorflow:libtensorflow_cc.so (217 packages loaded, 19638 targets configured).
INFO: Found 1 target...
Target //tensorflow:libtensorflow_cc.so up-to-date:
INFO: Elapsed time: 3862.275s, Critical Path: 188.63s
INFO: 7274 processes: 337 internal, 6937 local.
INFO: Build completed successfully, 7274 total actions


4,测试发现:tensorflow_mklomp2.6/include/tensorflow/core/framework/graph.pb.h:10:10: 致命错误:google/protobuf/port_def.inc:没有那个文件或目录
于是回到编译文件夹那里重新编译:bazel build --config=v2 --config=mkl -c opt --copt=-march=native //tensorflow:libtensorflow_cc.so //tensorflow:install_headers

5,运行实例/tensorflow_mklomp2.6/include/tensorflow/core/framework/tensor.h:906:7: 错误:static assertion failed: std::string is no longer a scalar type, use tensorflow::tstring
  906 |       !std::is_same::value,
      |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
src/subdir.mk:21: recipe for target 'src/jinXingCnn.o' failed
make: *** [src/jinXingCnn.o] Error 1

solution:https://github.com/tensorflow/tensorflow/issues/43150 将工程中下列地方改成tensorflow::tstring

2021-11-01 15:49:10.354330: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3407930000 Hz
2021-11-01 15:49:10.354590: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1eee5c0 executing computations on platform Host. Devices:
2021-11-01 15:49:10.354603: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): , 
/home/jumper/xrt/parallel/xrtCNNwithoutParallel/Release/xrtCNNwithoutParallel: relocation error: /home/jumper/xrt/parallel/xrtCNNwithoutParallel/Release/xrtCNNwithoutParallel: symbol _ZN10tensorflow15ReadBinaryProtoEPNS_3EnvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPN6google8protobuf11MessageLiteE, version tensorflow not defined in file libtensorflow_cc.so.2 with link time reference

set environment variable for path of tensorflow lib为刚刚的库路径设置环境变量即可
我是直接打开终端 export LD_LIBRARY_PATH=/pathoftensorflowlib:$LD_LIBRARY_PATH


Tensorflow2.6.0-MKL for C++_第4张图片Tensorflow2.6.0-MKL for C++_第5张图片而且设置也无问题,跑起来的时候核都飞起了。

Tensorflow2.6.0-MKL for C++_第6张图片但就是时间很慢很慢:

2021-11-04 09:20:09.222941: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
image 0  contoursnumber: 1 time: 165.404 ms!
image 1  contoursnumber: 1 time: 6.88905 ms!
image 2  contoursnumber: 1 time: 3.54466 ms!
image 3  contoursnumber: 1 time: 3.56257 ms!
image 4  contoursnumber: 1 time: 8.56913 ms!
image 5  contoursnumber: 1 time: 24.2727 ms!
image 6  contoursnumber: 1 time: 11.8442 ms!
image 7  contoursnumber: 2 time: 30.6798 ms!
image 8  contoursnumber: 1 time: 18.9718 ms!
image 9  contoursnumber: 1 time: 181.086 ms!
image 10  contoursnumber: 1 time: 34.2604 ms!
image 11  contoursnumber: 4 time: 94.7321 ms!
image 12  contoursnumber: 1 time: 2.78627 ms!
image 13  contoursnumber: 2 time: 17.1375 ms!
image 14  contoursnumber: 1 time: 10.5116 ms!
image 15  contoursnumber: 3 time: 69.1845 ms!
image 16  contoursnumber: 2 time: 54.063 ms!
image 17  contoursnumber: 5 time: 70.7947 ms!
image 18  contoursnumber: 3 time: 127.469 ms!
image 19  contoursnumber: 4 time: 137.692 ms!
image 20  contoursnumber: 5 time: 39.2106 ms!
image 21  contoursnumber: 10 time: 70.2001 ms!
image 22  contoursnumber: 6 time: 58.2006 ms!
image 23  contoursnumber: 9 time: 145.273 ms!
image 24  contoursnumber: 16 time: 164.113 ms!
image 25  contoursnumber: 14 time: 140.407 ms!
image 26  contoursnumber: 19 time: 368.37 ms!
image 27  contoursnumber: 16 time: 177.934 ms!
image 28  contoursnumber: 14 time: 253.712 ms!
image 29  contoursnumber: 16 time: 68.7066 ms!
image 30  contoursnumber: 20 time: 308.638 ms!
image 31  contoursnumber: 35 time: 621.751 ms!
image 32  contoursnumber: 35 time: 294.543 ms!
image 33  contoursnumber: 33 time: 190.038 ms!
image 34  contoursnumber: 33 time: 384.437 ms!
image 35  contoursnumber: 35 time: 641.447 ms!
image 36  contoursnumber: 30 time: 117.715 ms!
image 37  contoursnumber: 43 time: 118.032 ms!
image 38  contoursnumber: 29 time: 138.433 ms!
image 39  contoursnumber: 40 time: 250.087 ms!
image 40  contoursnumber: 60 time: 543.546 ms!
image 41  contoursnumber: 59 time: 604.045 ms!
image 42  contoursnumber: 68 time: 363.982 ms!
image 43  contoursnumber: 60 time: 473.302 ms!
image 44  contoursnumber: 83 time: 786.114 ms!
image 45  contoursnumber: 71 time: 238.955 ms!
image 46  contoursnumber: 83 time: 560.018 ms!
image 47  contoursnumber: 85 time: 1055.17 ms!
image 48  contoursnumber: 77 time: 612.51 ms!
image 49  contoursnumber: 82 time: 871.528 ms!
image 50  contoursnumber: 95 time: 503.243 ms!
image 51  contoursnumber: 107 time: 789.133 ms!
image 52  contoursnumber: 101 time: 751.478 ms!
image 53  contoursnumber: 95 time: 521.219 ms!
image 54  contoursnumber: 74 time: 348.509 ms!
image 55  contoursnumber: 95 time: 700.569 ms!
image 56  contoursnumber: 86 time: 563.311 ms!
image 57  contoursnumber: 94 time: 696.34 ms!
image 58  contoursnumber: 104 time: 678.648 ms!
image 59  contoursnumber: 93 time: 689.167 ms!
image 60  contoursnumber: 128 time: 785.795 ms!
image 61  contoursnumber: 104 time: 370.197 ms!
image 62  contoursnumber: 101 time: 695.368 ms!
image 63  contoursnumber: 99 time: 386.249 ms!
image 64  contoursnumber: 103 time: 654.749 ms!
image 65  contoursnumber: 111 time: 350.074 ms!
image 66  contoursnumber: 129 time: 1702.72 ms!
image 67  contoursnumber: 109 time: 544.824 ms!
image 68  contoursnumber: 121 time: 1259.43 ms!
image 69  contoursnumber: 116 time: 634.254 ms!
image 70  contoursnumber: 112 time: 1078.5 ms!
image 71  contoursnumber: 113 time: 448.014 ms!
image 72  contoursnumber: 115 time: 578.946 ms!
image 73  contoursnumber: 104 time: 866.64 ms!
image 74  contoursnumber: 95 time: 839.735 ms!
image 75  contoursnumber: 108 time: 622.835 ms!
image 76  contoursnumber: 77 time: 498.654 ms!
image 77  contoursnumber: 115 time: 1157.03 ms!
image 78  contoursnumber: 103 time: 2103.04 ms!
image 79  contoursnumber: 91 time: 421.994 ms!
image 80  contoursnumber: 84 time: 532.462 ms!
image 81  contoursnumber: 88 time: 732.916 ms!
image 82  contoursnumber: 72 time: 553.692 ms!
image 83  contoursnumber: 74 time: 278.869 ms!
image 84  contoursnumber: 75 time: 370.129 ms!
image 85  contoursnumber: 72 time: 568.95 ms!
image 86  contoursnumber: 71 time: 935.075 ms!
image 87  contoursnumber: 58 time: 611.403 ms!
image 88  contoursnumber: 61 time: 608.242 ms!
image 89  contoursnumber: 68 time: 638.952 ms!
image 90  contoursnumber: 49 time: 679.073 ms!
image 91  contoursnumber: 52 time: 197.024 ms!
image 92  contoursnumber: 57 time: 213.327 ms!
image 93  contoursnumber: 60 time: 145.864 ms!
image 94  contoursnumber: 51 time: 178.088 ms!
image 95  contoursnumber: 20 time: 90.9068 ms!
image 96  contoursnumber: 29 time: 575.21 ms!
image 97  contoursnumber: 21 time: 142.668 ms!
image 98  contoursnumber: 10 time: 155.449 ms!
image 99  contoursnumber: 7 time: 56.3241 ms!
image 100  contoursnumber: 4 time: 112.157 ms!
image 101  contoursnumber: 3 time: 48.2224 ms!
image 102  contoursnumber: 1 time: 4.49101 ms!
image 103  contoursnumber: 1 time: 9.43159 ms!
image 104  contoursnumber: 2 time: 31.763 ms!
image 105  contoursnumber: 2 time: 259.153 ms!
image 106  contoursnumber: 1 time: 8.34897 ms!
image 107  contoursnumber: 1 time: 14.1501 ms!
image 108  contoursnumber: 1 time: 13.4157 ms!
image 109  contoursnumber: 1 time: 15.2861 ms!
image 110  contoursnumber: 1 time: 30.0232 ms!
mean time 385.456 ms
max time 2103.04 ms


image 0  contoursnumber: 1 time: 215.328 ms!
image 1  contoursnumber: 1 time: 3.9097 ms!
image 2  contoursnumber: 1 time: 4.38048 ms!
image 3  contoursnumber: 1 time: 4.9417 ms!
image 4  contoursnumber: 1 time: 3.99377 ms!
image 5  contoursnumber: 1 time: 4.54973 ms!
image 6  contoursnumber: 1 time: 4.42919 ms!
image 7  contoursnumber: 2 time: 8.74767 ms!
image 8  contoursnumber: 1 time: 4.49718 ms!
image 9  contoursnumber: 1 time: 4.54444 ms!
image 10  contoursnumber: 1 time: 4.26188 ms!
image 11  contoursnumber: 4 time: 14.6775 ms!
image 12  contoursnumber: 1 time: 3.85717 ms!
image 13  contoursnumber: 2 time: 7.7923 ms!
image 14  contoursnumber: 1 time: 4.02385 ms!
image 15  contoursnumber: 3 time: 11.6701 ms!
image 16  contoursnumber: 2 time: 7.56668 ms!
image 17  contoursnumber: 5 time: 17.2285 ms!
image 18  contoursnumber: 3 time: 10.9037 ms!
image 19  contoursnumber: 4 time: 15.8927 ms!
image 20  contoursnumber: 5 time: 17.9607 ms!
image 21  contoursnumber: 10 time: 34.51 ms!
image 22  contoursnumber: 6 time: 21.092 ms!
image 23  contoursnumber: 9 time: 32.4407 ms!
image 24  contoursnumber: 16 time: 55.0001 ms!
image 25  contoursnumber: 14 time: 47.8659 ms!
image 26  contoursnumber: 19 time: 67.684 ms!
image 27  contoursnumber: 16 time: 56.6169 ms!
image 28  contoursnumber: 14 time: 47.0008 ms!
image 29  contoursnumber: 16 time: 54.8117 ms!
image 30  contoursnumber: 20 time: 67.6562 ms!
image 31  contoursnumber: 35 time: 117.237 ms!
image 32  contoursnumber: 35 time: 110.993 ms!
image 33  contoursnumber: 33 time: 110.121 ms!
image 34  contoursnumber: 33 time: 106.439 ms!
image 35  contoursnumber: 35 time: 118.31 ms!
image 36  contoursnumber: 30 time: 98.951 ms!
image 37  contoursnumber: 43 time: 141.166 ms!
image 38  contoursnumber: 29 time: 95.6124 ms!
image 39  contoursnumber: 40 time: 131.273 ms!
image 40  contoursnumber: 60 time: 196.478 ms!
image 41  contoursnumber: 59 time: 198.934 ms!
image 42  contoursnumber: 68 time: 216.352 ms!
image 43  contoursnumber: 60 time: 193.67 ms!
image 44  contoursnumber: 83 time: 259.158 ms!
image 45  contoursnumber: 71 time: 236.651 ms!
image 46  contoursnumber: 83 time: 262.82 ms!
image 47  contoursnumber: 85 time: 280.913 ms!
image 48  contoursnumber: 77 time: 247.691 ms!
image 49  contoursnumber: 82 time: 264.615 ms!
image 50  contoursnumber: 95 time: 298.288 ms!
image 51  contoursnumber: 107 time: 352.973 ms!
image 52  contoursnumber: 101 time: 333.127 ms!
image 53  contoursnumber: 95 time: 304.485 ms!
image 54  contoursnumber: 74 time: 242.383 ms!
image 55  contoursnumber: 95 time: 302.55 ms!
image 56  contoursnumber: 86 time: 281.847 ms!
image 57  contoursnumber: 94 time: 314.126 ms!
image 58  contoursnumber: 104 time: 344.507 ms!
image 59  contoursnumber: 93 time: 299.361 ms!
image 60  contoursnumber: 128 time: 403.624 ms!
image 61  contoursnumber: 104 time: 338.79 ms!
image 62  contoursnumber: 101 time: 349.402 ms!
image 63  contoursnumber: 99 time: 317.887 ms!
image 64  contoursnumber: 103 time: 337.589 ms!
image 65  contoursnumber: 111 time: 350.287 ms!
image 66  contoursnumber: 129 time: 405.237 ms!
image 67  contoursnumber: 109 time: 370.085 ms!
image 68  contoursnumber: 121 time: 403.513 ms!
image 69  contoursnumber: 116 time: 376.87 ms!
image 70  contoursnumber: 112 time: 377.656 ms!
image 71  contoursnumber: 113 time: 376.028 ms!
image 72  contoursnumber: 115 time: 388.223 ms!
image 73  contoursnumber: 104 time: 337.376 ms!
image 74  contoursnumber: 95 time: 308.087 ms!
image 75  contoursnumber: 108 time: 334.705 ms!
image 76  contoursnumber: 77 time: 250.793 ms!
image 77  contoursnumber: 115 time: 378.777 ms!
image 78  contoursnumber: 103 time: 330.838 ms!
image 79  contoursnumber: 91 time: 298.235 ms!
image 80  contoursnumber: 84 time: 264.395 ms!
image 81  contoursnumber: 88 time: 303.579 ms!
image 82  contoursnumber: 72 time: 234.702 ms!
image 83  contoursnumber: 74 time: 236.552 ms!
image 84  contoursnumber: 75 time: 243.109 ms!
image 85  contoursnumber: 72 time: 232.462 ms!
image 86  contoursnumber: 71 time: 231.71 ms!
image 87  contoursnumber: 58 time: 188.411 ms!
image 88  contoursnumber: 61 time: 195.348 ms!
image 89  contoursnumber: 68 time: 237.083 ms!
image 90  contoursnumber: 49 time: 157.995 ms!
image 91  contoursnumber: 52 time: 167.288 ms!
image 92  contoursnumber: 57 time: 188.229 ms!
image 93  contoursnumber: 60 time: 193.937 ms!
image 94  contoursnumber: 51 time: 158.905 ms!
image 95  contoursnumber: 20 time: 72.3785 ms!
image 96  contoursnumber: 29 time: 93.3377 ms!
image 97  contoursnumber: 21 time: 69.2373 ms!
image 98  contoursnumber: 10 time: 36.3054 ms!
image 99  contoursnumber: 7 time: 26.6407 ms!
image 100  contoursnumber: 4 time: 15.0212 ms!
image 101  contoursnumber: 3 time: 11.5187 ms!
image 102  contoursnumber: 1 time: 4.57232 ms!
image 103  contoursnumber: 1 time: 4.56704 ms!
image 104  contoursnumber: 2 time: 7.91162 ms!
image 105  contoursnumber: 2 time: 8.47972 ms!
image 106  contoursnumber: 1 time: 4.10723 ms!
image 107  contoursnumber: 1 time: 4.14542 ms!
image 108  contoursnumber: 1 time: 4.46583 ms!
image 109  contoursnumber: 1 time: 4.3269 ms!
image 110  contoursnumber: 1 time: 4.10413 ms!





lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs
export OMP_NUM_THREADS= # 
export KMP_AFFINITY="granularity=fine,verbose,compact,1,0"


root@rootwd-Default-string:/home/jumper/xrt/parallel/xrtCNNTry/Release# ./xrtCNNTry 

libgomp: Invalid value for environment variable OMP_NUM_THREADS
OMP: Warning #227: OMP_NUM_THREADS: Invalid symbols found. Check the value "".

User settings:


Effective settings:

   KMP_CPUINFO_FILE: value is not defined
   KMP_FORCE_REDUCTION: value is not defined
   OMP_NUM_THREADS: value is not defined
   OMP_PLACES: value is not defined

OMP: Info #209: KMP_AFFINITY: decoding x2APIC ids.
OMP: Info #207: KMP_AFFINITY: Affinity capable, using global cpuid leaf 11 info
OMP: Info #154: KMP_AFFINITY: Initial OS proc set respected: {0,1,2,3,4,5,6,7}
OMP: Info #156: KMP_AFFINITY: 8 available OS procs
OMP: Info #157: KMP_AFFINITY: Uniform topology
OMP: Info #179: KMP_AFFINITY: 1 packages x 4 cores/pkg x 2 threads/core (4 total cores)
OMP: Info #211: KMP_AFFINITY: OS proc to physical thread map:
OMP: Info #171: KMP_AFFINITY: OS proc 0 maps to package 0 core 0 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 4 maps to package 0 core 0 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 1 maps to package 0 core 1 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 5 maps to package 0 core 1 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 2 maps to package 0 core 2 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 6 maps to package 0 core 2 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 3 maps to package 0 core 3 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 7 maps to package 0 core 3 thread 1 
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3124 thread 0 bound to OS proc set {0}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3135 thread 1 bound to OS proc set {1}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3136 thread 2 bound to OS proc set {2}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3137 thread 3 bound to OS proc set {3}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3138 thread 4 bound to OS proc set {4}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3139 thread 5 bound to OS proc set {5}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3140 thread 6 bound to OS proc set {6}
OMP: Info #247: KMP_AFFINITY: pid 3115 tid 3141 thread 7 bound to OS proc set {7}
image 0  contoursnumber: 1 time: 500.349 ms!
image 1  contoursnumber: 1 time: 8.03288 ms!
image 2  contoursnumber: 1 time: 9.97024 ms!
image 3  contoursnumber: 1 time: 2.0015 ms!
image 4  contoursnumber: 1 time: 3.70273 ms!
image 5  contoursnumber: 1 time: 1.80889 ms!
image 6  contoursnumber: 1 time: 1.80585 ms!
image 7  contoursnumber: 2 time: 3.36128 ms!
image 8  contoursnumber: 1 time: 1.83141 ms!
image 9  contoursnumber: 1 time: 1.84443 ms!
image 10  contoursnumber: 1 time: 1.99455 ms!
image 11  contoursnumber: 4 time: 5.84807 ms!
image 12  contoursnumber: 1 time: 1.80329 ms!
image 13  contoursnumber: 2 time: 3.22477 ms!
image 14  contoursnumber: 1 time: 1.82 ms!
image 15  contoursnumber: 3 time: 4.60704 ms!
image 16  contoursnumber: 2 time: 3.2597 ms!
image 17  contoursnumber: 5 time: 7.34827 ms!
image 18  contoursnumber: 3 time: 4.52042 ms!
image 19  contoursnumber: 4 time: 5.95517 ms!
image 20  contoursnumber: 5 time: 7.40705 ms!
image 21  contoursnumber: 10 time: 13.9073 ms!
image 22  contoursnumber: 6 time: 8.53849 ms!
image 23  contoursnumber: 9 time: 12.786 ms!
image 24  contoursnumber: 16 time: 22.1717 ms!
image 25  contoursnumber: 14 time: 35.0056 ms!
image 26  contoursnumber: 19 time: 25.7021 ms!
image 27  contoursnumber: 16 time: 21.7129 ms!
image 28  contoursnumber: 14 time: 19.0482 ms!
image 29  contoursnumber: 16 time: 21.5907 ms!
image 30  contoursnumber: 20 time: 26.9468 ms!
image 31  contoursnumber: 35 time: 46.3722 ms!
image 32  contoursnumber: 35 time: 46.4969 ms!
image 33  contoursnumber: 33 time: 59.5481 ms!
image 34  contoursnumber: 33 time: 44.0666 ms!
image 35  contoursnumber: 35 time: 46.5968 ms!
image 36  contoursnumber: 30 time: 40.0138 ms!
image 37  contoursnumber: 43 time: 56.8861 ms!
image 38  contoursnumber: 29 time: 38.9458 ms!
image 39  contoursnumber: 40 time: 68.6904 ms!
image 40  contoursnumber: 60 time: 79.8226 ms!
image 41  contoursnumber: 59 time: 79.163 ms!
image 42  contoursnumber: 68 time: 89.7483 ms!
image 43  contoursnumber: 60 time: 79.693 ms!
image 44  contoursnumber: 83 time: 116.971 ms!
image 45  contoursnumber: 71 time: 93.7241 ms!
image 46  contoursnumber: 83 time: 109.284 ms!
image 47  contoursnumber: 85 time: 132.655 ms!
image 48  contoursnumber: 77 time: 101.51 ms!
image 49  contoursnumber: 82 time: 108.38 ms!
image 50  contoursnumber: 95 time: 124.985 ms!
image 51  contoursnumber: 107 time: 156.501 ms!
image 52  contoursnumber: 101 time: 133.039 ms!
image 53  contoursnumber: 95 time: 127.139 ms!
image 54  contoursnumber: 74 time: 125.535 ms!
image 55  contoursnumber: 95 time: 134.212 ms!
image 56  contoursnumber: 86 time: 113.548 ms!
image 57  contoursnumber: 94 time: 152.801 ms!
image 58  contoursnumber: 104 time: 144.552 ms!
image 59  contoursnumber: 93 time: 127.866 ms!
image 60  contoursnumber: 128 time: 197.675 ms!
image 61  contoursnumber: 104 time: 144.514 ms!
image 62  contoursnumber: 101 time: 141.193 ms!
image 63  contoursnumber: 99 time: 160.731 ms!
image 64  contoursnumber: 103 time: 141.909 ms!
image 65  contoursnumber: 111 time: 151.289 ms!
image 66  contoursnumber: 129 time: 203.25 ms!
image 67  contoursnumber: 109 time: 150.636 ms!
image 68  contoursnumber: 121 time: 165.343 ms!
image 69  contoursnumber: 116 time: 180.688 ms!
image 70  contoursnumber: 112 time: 155.768 ms!
image 71  contoursnumber: 113 time: 156.376 ms!
image 72  contoursnumber: 115 time: 180.892 ms!
image 73  contoursnumber: 104 time: 144.901 ms!
image 74  contoursnumber: 95 time: 133.066 ms!
image 75  contoursnumber: 108 time: 170.592 ms!
image 76  contoursnumber: 77 time: 110.238 ms!
image 77  contoursnumber: 115 time: 158.94 ms!
image 78  contoursnumber: 103 time: 163.685 ms!
image 79  contoursnumber: 91 time: 125.024 ms!
image 80  contoursnumber: 84 time: 114.479 ms!
image 81  contoursnumber: 88 time: 120.804 ms!
image 82  contoursnumber: 72 time: 125.362 ms!
image 83  contoursnumber: 74 time: 103.037 ms!
image 84  contoursnumber: 75 time: 103.719 ms!
image 85  contoursnumber: 72 time: 100.35 ms!
image 86  contoursnumber: 71 time: 119.934 ms!
image 87  contoursnumber: 58 time: 87.3322 ms!
image 88  contoursnumber: 61 time: 88.1855 ms!
image 89  contoursnumber: 68 time: 97.5548 ms!
image 90  contoursnumber: 49 time: 95.7405 ms!
image 91  contoursnumber: 52 time: 76.9936 ms!
image 92  contoursnumber: 57 time: 83.1304 ms!
image 93  contoursnumber: 60 time: 86.7529 ms!
image 94  contoursnumber: 51 time: 75.4882 ms!
image 95  contoursnumber: 20 time: 33.4222 ms!
image 96  contoursnumber: 29 time: 45.5786 ms!
image 97  contoursnumber: 21 time: 33.7253 ms!
image 98  contoursnumber: 10 time: 21.7852 ms!
image 99  contoursnumber: 7 time: 17.7007 ms!
image 100  contoursnumber: 4 time: 13.0195 ms!
image 101  contoursnumber: 3 time: 12.3081 ms!
image 102  contoursnumber: 1 time: 9.95971 ms!
image 103  contoursnumber: 1 time: 9.39383 ms!
image 104  contoursnumber: 2 time: 3.38718 ms!
image 105  contoursnumber: 2 time: 3.18975 ms!
image 106  contoursnumber: 1 time: 1.76616 ms!
image 107  contoursnumber: 1 time: 1.80187 ms!
image 108  contoursnumber: 1 time: 2.79997 ms!
image 109  contoursnumber: 1 time: 1.85328 ms!
image 110  contoursnumber: 1 time: 1.7689 ms!
mean time 75.6548 ms
max time 500.349 ms


下次在i9上试下。在Intel Core i9-9900K上试了下:

i9-9900K上的设置:copy tensorflow_mklomp2.6

0,vi /etc/ld.so.conf

1,error while loading shared libraries: libiomp5.so: cannot open shared object file: No such file or directory


	root@ubuntu:~/oneDNN/build# cmake ..
	-- Could NOT find Doxygen (missing:  DOXYGEN_EXECUTABLE) 
	-- Could NOT find Doxyrest (missing:  DOXYREST_EXECUTABLE) 
	-- Could NOT find PythonInterp (missing:  PYTHON_EXECUTABLE) (Required is at least version "2.7")
	-- Could NOT find Sphinx (missing:  SPHINX_EXECUTABLE) 
	-- Enabled workload: TRAINING
	-- Enabled primitives: ALL
	-- Enabled primitive CPU ISA: ALL
	-- Primitive cache is enabled
	-- Configuring done
	-- Generating done
	-- Build files have been written to: /root/oneDNN/build

	solution:apt install python

					wget http://sphinxsearch.com/files/sphinx-2.2.5-release.tar.gz
					tar zxvf filename.tar.gz
                                        ./configure --prefix=/usr/local/sphinx
					checking MySQL include files... configure: error: missing include files.

					ERROR: cannot find MySQL include files.

					Check that you do have MySQL include files installed.
					The package name is typically 'mysql-devel'.

					If include files are installed on your system, but you are still getting
					this message, you should do one of the following:

					1) either specify includes location explicitly, using --with-mysql-includes;
					2) or specify MySQL installation root location explicitly, using --with-mysql;
					3) or make sure that the path to 'mysql_config' program is listed in
					   your PATH environment variable.

					To disable MySQL support, use --without-mysql option.
				        solution:apt install libmysqlclient-dev
					make && make install
	solution:apt install python-sphinx
		 apt install doxygen
	root@ubuntu:~/oneDNN/build# cmake ..
	-- Could NOT find Doxyrest (missing:  DOXYREST_EXECUTABLE) 


cp /opt/intel/mkldnn/lib/libmkldnn.so.1.0 /usr/local/lib
cp /opt/intel/mkldnn/lib/libmkldnn.so.1 /usr/local/lib
cp /opt/intel/mkldnn/lib/libmkldnn.so /usr/local/lib

error while loading shared libraries: libiomp5.so: cannot open shared object file: No such file or directory



root@ubuntu:/home/jumper/xrt/projects# ./xrtCNNTry

libgomp: Invalid value for environment variable OMP_NUM_THREADS
OMP: Warning #230: OMP_NUM_THREADS: Invalid symbols found. Check the value "".

User settings:


Effective settings:

   KMP_CPUINFO_FILE: value is not defined
   KMP_FORCE_REDUCTION: value is not defined
   OMP_AFFINITY_FORMAT='OMP: pid %P tid %T thread %n bound to OS proc set {%a}'
   OMP_NUM_THREADS: value is not defined
   OMP_PLACES: value is not defined
   OMP_TOOL_LIBRARIES: value is not defined

OMP: Info #212: KMP_AFFINITY: decoding x2APIC ids.
OMP: Info #210: KMP_AFFINITY: Affinity capable, using global cpuid leaf 11 info
OMP: Info #154: KMP_AFFINITY: Initial OS proc set respected: 0-15
OMP: Info #156: KMP_AFFINITY: 16 available OS procs
OMP: Info #157: KMP_AFFINITY: Uniform topology
OMP: Info #179: KMP_AFFINITY: 1 packages x 8 cores/pkg x 2 threads/core (8 total cores)
OMP: Info #214: KMP_AFFINITY: OS proc to physical thread map:
OMP: Info #171: KMP_AFFINITY: OS proc 0 maps to package 0 core 0 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 8 maps to package 0 core 0 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 1 maps to package 0 core 1 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 9 maps to package 0 core 1 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 2 maps to package 0 core 2 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 10 maps to package 0 core 2 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 3 maps to package 0 core 3 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 11 maps to package 0 core 3 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 4 maps to package 0 core 4 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 12 maps to package 0 core 4 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 5 maps to package 0 core 5 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 13 maps to package 0 core 5 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 6 maps to package 0 core 6 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 14 maps to package 0 core 6 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 7 maps to package 0 core 7 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 15 maps to package 0 core 7 thread 1 
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3813 thread 0 bound to OS proc set 0
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3829 thread 2 bound to OS proc set 2
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3828 thread 1 bound to OS proc set 1
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3830 thread 3 bound to OS proc set 3
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3831 thread 4 bound to OS proc set 4
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3833 thread 6 bound to OS proc set 6
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3832 thread 5 bound to OS proc set 5
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3834 thread 7 bound to OS proc set 7
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3835 thread 8 bound to OS proc set 8
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3837 thread 10 bound to OS proc set 10
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3836 thread 9 bound to OS proc set 9
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3838 thread 11 bound to OS proc set 11
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3839 thread 12 bound to OS proc set 12
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3841 thread 14 bound to OS proc set 14
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3840 thread 13 bound to OS proc set 13
OMP: Info #250: KMP_AFFINITY: pid 3796 tid 3842 thread 15 bound to OS proc set 15
image 0  contoursnumber: 1 time: 545.958 ms!
image 1  contoursnumber: 1 time: 2.75895 ms!
image 2  contoursnumber: 1 time: 2.81572 ms!
image 3  contoursnumber: 1 time: 2.60694 ms!
image 4  contoursnumber: 1 time: 2.629 ms!
image 5  contoursnumber: 1 time: 2.65074 ms!
image 6  contoursnumber: 1 time: 2.69729 ms!
image 7  contoursnumber: 2 time: 5.03756 ms!
image 8  contoursnumber: 1 time: 2.62776 ms!
image 9  contoursnumber: 1 time: 2.65478 ms!
image 10  contoursnumber: 1 time: 2.67608 ms!
image 11  contoursnumber: 4 time: 9.5571 ms!
image 12  contoursnumber: 1 time: 2.58764 ms!
image 13  contoursnumber: 2 time: 5.02788 ms!
image 14  contoursnumber: 1 time: 2.64267 ms!
image 15  contoursnumber: 3 time: 7.22725 ms!
image 16  contoursnumber: 2 time: 4.88518 ms!
image 17  contoursnumber: 5 time: 11.7531 ms!
image 18  contoursnumber: 3 time: 7.15299 ms!
image 19  contoursnumber: 4 time: 9.43651 ms!
image 20  contoursnumber: 5 time: 11.7737 ms!
image 21  contoursnumber: 10 time: 23.266 ms!
image 22  contoursnumber: 6 time: 14.0629 ms!
image 23  contoursnumber: 9 time: 20.9185 ms!
image 24  contoursnumber: 16 time: 36.8501 ms!
image 25  contoursnumber: 14 time: 32.2419 ms!
image 26  contoursnumber: 19 time: 42.6967 ms!
image 27  contoursnumber: 16 time: 36.4316 ms!
image 28  contoursnumber: 14 time: 31.6114 ms!
image 29  contoursnumber: 16 time: 36.3981 ms!
image 30  contoursnumber: 20 time: 45.1732 ms!
image 31  contoursnumber: 35 time: 79.0561 ms!
image 32  contoursnumber: 35 time: 78.977 ms!
image 33  contoursnumber: 33 time: 74.5243 ms!
image 34  contoursnumber: 33 time: 74.8152 ms!
image 35  contoursnumber: 35 time: 79.1064 ms!
image 36  contoursnumber: 30 time: 67.8377 ms!
image 37  contoursnumber: 43 time: 96.9035 ms!
image 38  contoursnumber: 29 time: 65.5133 ms!
image 39  contoursnumber: 40 time: 90.5936 ms!
image 40  contoursnumber: 60 time: 142.718 ms!
image 41  contoursnumber: 59 time: 132.777 ms!
image 42  contoursnumber: 68 time: 153.493 ms!
image 43  contoursnumber: 60 time: 135.566 ms!
image 44  contoursnumber: 83 time: 187.208 ms!
image 45  contoursnumber: 71 time: 160.202 ms!
image 46  contoursnumber: 83 time: 187.224 ms!
image 47  contoursnumber: 85 time: 192.007 ms!
image 48  contoursnumber: 77 time: 173.49 ms!
image 49  contoursnumber: 82 time: 184.844 ms!
image 50  contoursnumber: 95 time: 214.119 ms!
image 51  contoursnumber: 107 time: 241.767 ms!
image 52  contoursnumber: 101 time: 227.876 ms!
image 53  contoursnumber: 95 time: 263.539 ms!
image 54  contoursnumber: 74 time: 166.959 ms!
image 55  contoursnumber: 95 time: 214.315 ms!
image 56  contoursnumber: 86 time: 194.752 ms!
image 57  contoursnumber: 94 time: 212.681 ms!
image 58  contoursnumber: 104 time: 241.592 ms!
image 59  contoursnumber: 93 time: 210.203 ms!
image 60  contoursnumber: 128 time: 289.51 ms!
image 61  contoursnumber: 104 time: 237.612 ms!
image 62  contoursnumber: 101 time: 229.251 ms!
image 63  contoursnumber: 99 time: 224.104 ms!
image 64  contoursnumber: 103 time: 234.302 ms!
image 65  contoursnumber: 111 time: 251.253 ms!
image 66  contoursnumber: 129 time: 292.019 ms!
image 67  contoursnumber: 109 time: 246.509 ms!
image 68  contoursnumber: 121 time: 350.947 ms!
image 69  contoursnumber: 116 time: 262.582 ms!
image 70  contoursnumber: 112 time: 254.463 ms!
image 71  contoursnumber: 113 time: 385.545 ms!
image 72  contoursnumber: 115 time: 925.221 ms!
image 73  contoursnumber: 104 time: 234.835 ms!
image 74  contoursnumber: 95 time: 214.467 ms!
image 75  contoursnumber: 108 time: 244.214 ms!
image 76  contoursnumber: 77 time: 174.85 ms!
image 77  contoursnumber: 115 time: 261.896 ms!
image 78  contoursnumber: 103 time: 233.937 ms!
image 79  contoursnumber: 91 time: 207.413 ms!
image 80  contoursnumber: 84 time: 191.105 ms!
image 81  contoursnumber: 88 time: 199.925 ms!
image 82  contoursnumber: 72 time: 162.594 ms!
image 83  contoursnumber: 74 time: 168.641 ms!
image 84  contoursnumber: 75 time: 178.051 ms!
image 85  contoursnumber: 72 time: 164.123 ms!
image 86  contoursnumber: 71 time: 161.951 ms!
image 87  contoursnumber: 58 time: 132.326 ms!
image 88  contoursnumber: 61 time: 138.941 ms!
image 89  contoursnumber: 68 time: 155.192 ms!
image 90  contoursnumber: 49 time: 112.29 ms!
image 91  contoursnumber: 52 time: 118.127 ms!
image 92  contoursnumber: 57 time: 129.372 ms!
image 93  contoursnumber: 60 time: 136.79 ms!
image 94  contoursnumber: 51 time: 115.553 ms!
image 95  contoursnumber: 20 time: 46.1896 ms!
image 96  contoursnumber: 29 time: 65.7994 ms!
image 97  contoursnumber: 21 time: 47.9674 ms!
image 98  contoursnumber: 10 time: 22.9276 ms!
image 99  contoursnumber: 7 time: 16.0952 ms!
image 100  contoursnumber: 4 time: 9.60946 ms!
image 101  contoursnumber: 3 time: 7.26509 ms!
image 102  contoursnumber: 1 time: 2.59449 ms!
image 103  contoursnumber: 1 time: 2.64759 ms!
image 104  contoursnumber: 2 time: 5.07191 ms!
image 105  contoursnumber: 2 time: 5.10199 ms!
image 106  contoursnumber: 1 time: 2.58517 ms!
image 107  contoursnumber: 1 time: 2.64865 ms!
image 108  contoursnumber: 1 time: 2.71979 ms!
image 109  contoursnumber: 1 time: 2.69414 ms!
image 110  contoursnumber: 1 time: 2.72165 ms!
mean time 124.108 ms
max time 925.221 ms


root@ubuntu:/home/jumper/xrt/projects# export KMP_ABORT_DELAY=0
root@ubuntu:/home/jumper/xrt/projects# export KMP_ADAPTIVE_LOCK_PROPS='1,1024'
root@ubuntu:/home/jumper/xrt/projects# export KMP_ALIGN_ALLOC=64
root@ubuntu:/home/jumper/xrt/projects# export KMP_ALL_THREADPRIVATE=128
root@ubuntu:/home/jumper/xrt/projects# export KMP_ATOMIC_MODE=2
root@ubuntu:/home/jumper/xrt/projects# export KMP_BLOCKTIME=1
root@ubuntu:/home/jumper/xrt/projects# export KMP_DETERMINISTIC_REDUCTION=false
root@ubuntu:/home/jumper/xrt/projects# export KMP_DEVICE_THREAD_LIMIT=2147483647
root@ubuntu:/home/jumper/xrt/projects# export KMP_DISP_HAND_THREAD=false
root@ubuntu:/home/jumper/xrt/projects# export KMP_DISP_NUM_BUFFERS=7
root@ubuntu:/home/jumper/xrt/projects# export KMP_DUPLICATE_LIB_OK=false
root@ubuntu:/home/jumper/xrt/projects# export KMP_FOREIGN_THREADS_THREADPRIVATE=true
root@ubuntu:/home/jumper/xrt/projects# export KMP_FORKJOIN_BARRIER='2,2'
root@ubuntu:/home/jumper/xrt/projects# export KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
root@ubuntu:/home/jumper/xrt/projects# export KMP_FORKJOIN_FRAMES=true
root@ubuntu:/home/jumper/xrt/projects# export KMP_FORKJOIN_FRAMES_MODE=3
root@ubuntu:/home/jumper/xrt/projects# export KMP_GTID_MODE=3
root@ubuntu:/home/jumper/xrt/projects# export KMP_HANDLE_SIGNALS=false
root@ubuntu:/home/jumper/xrt/projects# export KMP_HOT_TEAMS_MAX_LEVEL=1
root@ubuntu:/home/jumper/xrt/projects# export KMP_HOT_TEAMS_MODE=0
root@ubuntu:/home/jumper/xrt/projects# export KMP_INIT_AT_FORK=true
root@ubuntu:/home/jumper/xrt/projects# export KMP_INIT_WAIT=2048
root@ubuntu:/home/jumper/xrt/projects# export KMP_TEAMS_THREAD_LIMIT=16
root@ubuntu:/home/jumper/xrt/projects# export OMP_ALLOCATOR=omp_default_mem_alloc
root@ubuntu:/home/jumper/xrt/projects# export OMP_MAX_ACTIVE_LEVELS=2147483647
root@ubuntu:/home/jumper/xrt/projects# export OMP_MAX_TASK_PRIORITY=0
root@ubuntu:/home/jumper/xrt/projects# export OMP_NUM_THREADS=8
root@ubuntu:/home/jumper/xrt/projects# export KMP_AFFINITY='verbose,warnings,respect,granularity=fine,compact,1,0'
root@ubuntu:/home/jumper/xrt/projects# ./xrtCNNTry

User settings:


Effective settings:

   KMP_CPUINFO_FILE: value is not defined
   KMP_FORCE_REDUCTION: value is not defined
   OMP_AFFINITY_FORMAT='OMP: pid %P tid %T thread %n bound to OS proc set {%a}'
   OMP_PLACES: value is not defined
   OMP_TOOL_LIBRARIES: value is not defined

OMP: Info #212: KMP_AFFINITY: decoding x2APIC ids.
OMP: Info #210: KMP_AFFINITY: Affinity capable, using global cpuid leaf 11 info
OMP: Info #154: KMP_AFFINITY: Initial OS proc set respected: 0-15
OMP: Info #156: KMP_AFFINITY: 16 available OS procs
OMP: Info #157: KMP_AFFINITY: Uniform topology
OMP: Info #179: KMP_AFFINITY: 1 packages x 8 cores/pkg x 2 threads/core (8 total cores)
OMP: Info #214: KMP_AFFINITY: OS proc to physical thread map:
OMP: Info #171: KMP_AFFINITY: OS proc 0 maps to package 0 core 0 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 8 maps to package 0 core 0 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 1 maps to package 0 core 1 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 9 maps to package 0 core 1 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 2 maps to package 0 core 2 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 10 maps to package 0 core 2 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 3 maps to package 0 core 3 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 11 maps to package 0 core 3 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 4 maps to package 0 core 4 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 12 maps to package 0 core 4 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 5 maps to package 0 core 5 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 13 maps to package 0 core 5 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 6 maps to package 0 core 6 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 14 maps to package 0 core 6 thread 1 
OMP: Info #171: KMP_AFFINITY: OS proc 7 maps to package 0 core 7 thread 0 
OMP: Info #171: KMP_AFFINITY: OS proc 15 maps to package 0 core 7 thread 1 
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7168 thread 0 bound to OS proc set 0
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7187 thread 1 bound to OS proc set 1
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7188 thread 2 bound to OS proc set 2
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7189 thread 3 bound to OS proc set 3
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7190 thread 4 bound to OS proc set 4
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7191 thread 5 bound to OS proc set 5
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7192 thread 6 bound to OS proc set 6
OMP: Info #250: KMP_AFFINITY: pid 7151 tid 7193 thread 7 bound to OS proc set 7
image 0  contoursnumber: 1 time: 540.462 ms!
image 1  contoursnumber: 1 time: 2.43491 ms!
image 2  contoursnumber: 1 time: 2.34395 ms!
image 3  contoursnumber: 1 time: 2.29904 ms!
image 4  contoursnumber: 1 time: 2.29803 ms!
image 5  contoursnumber: 1 time: 2.26987 ms!
image 6  contoursnumber: 1 time: 2.31498 ms!
image 7  contoursnumber: 2 time: 4.33249 ms!
image 8  contoursnumber: 1 time: 2.30232 ms!
image 9  contoursnumber: 1 time: 2.26087 ms!
image 10  contoursnumber: 1 time: 2.26373 ms!
image 11  contoursnumber: 4 time: 8.23919 ms!
image 12  contoursnumber: 1 time: 2.22784 ms!
image 13  contoursnumber: 2 time: 4.24408 ms!
image 14  contoursnumber: 1 time: 2.24563 ms!
image 15  contoursnumber: 3 time: 6.15239 ms!
image 16  contoursnumber: 2 time: 4.1945 ms!
image 17  contoursnumber: 5 time: 10.9899 ms!
image 18  contoursnumber: 3 time: 6.09401 ms!
image 19  contoursnumber: 4 time: 8.16248 ms!
image 20  contoursnumber: 5 time: 10.1609 ms!
image 21  contoursnumber: 10 time: 19.7754 ms!
image 22  contoursnumber: 6 time: 11.9569 ms!
image 23  contoursnumber: 9 time: 17.9706 ms!
image 24  contoursnumber: 16 time: 31.5628 ms!
image 25  contoursnumber: 14 time: 27.4714 ms!
image 26  contoursnumber: 19 time: 36.654 ms!
image 27  contoursnumber: 16 time: 30.9366 ms!
image 28  contoursnumber: 14 time: 27.241 ms!
image 29  contoursnumber: 16 time: 31.1286 ms!
image 30  contoursnumber: 20 time: 38.6733 ms!
image 31  contoursnumber: 35 time: 67.5998 ms!
image 32  contoursnumber: 35 time: 67.9385 ms!
image 33  contoursnumber: 33 time: 97.487 ms!
image 34  contoursnumber: 33 time: 63.8534 ms!
image 35  contoursnumber: 35 time: 67.7263 ms!
image 36  contoursnumber: 30 time: 57.8766 ms!
image 37  contoursnumber: 43 time: 83.2774 ms!
image 38  contoursnumber: 29 time: 56.219 ms!
image 39  contoursnumber: 40 time: 77.5566 ms!
image 40  contoursnumber: 60 time: 115.673 ms!
image 41  contoursnumber: 59 time: 114.054 ms!
image 42  contoursnumber: 68 time: 131.358 ms!
image 43  contoursnumber: 60 time: 123.464 ms!
image 44  contoursnumber: 83 time: 162.16 ms!
image 45  contoursnumber: 71 time: 139.174 ms!
image 46  contoursnumber: 83 time: 162.967 ms!
image 47  contoursnumber: 85 time: 166.877 ms!
image 48  contoursnumber: 77 time: 164.705 ms!
image 49  contoursnumber: 82 time: 158.783 ms!
image 50  contoursnumber: 95 time: 183.512 ms!
image 51  contoursnumber: 107 time: 206.621 ms!
image 52  contoursnumber: 101 time: 194.661 ms!
image 53  contoursnumber: 95 time: 183.088 ms!
image 54  contoursnumber: 74 time: 142.938 ms!
image 55  contoursnumber: 95 time: 183.331 ms!
image 56  contoursnumber: 86 time: 166.222 ms!
image 57  contoursnumber: 94 time: 181.357 ms!
image 58  contoursnumber: 104 time: 200.238 ms!
image 59  contoursnumber: 93 time: 179.249 ms!
image 60  contoursnumber: 128 time: 246.765 ms!
image 61  contoursnumber: 104 time: 200.685 ms!
image 62  contoursnumber: 101 time: 194.998 ms!
image 63  contoursnumber: 99 time: 191.143 ms!
image 64  contoursnumber: 103 time: 198.842 ms!
image 65  contoursnumber: 111 time: 214.208 ms!
image 66  contoursnumber: 129 time: 248.685 ms!
image 67  contoursnumber: 109 time: 210.33 ms!
image 68  contoursnumber: 121 time: 233.23 ms!
image 69  contoursnumber: 116 time: 224.134 ms!
image 70  contoursnumber: 112 time: 216.384 ms!
image 71  contoursnumber: 113 time: 218.136 ms!
image 72  contoursnumber: 115 time: 221.682 ms!
image 73  contoursnumber: 104 time: 200.762 ms!
image 74  contoursnumber: 95 time: 183.564 ms!
image 75  contoursnumber: 108 time: 207.921 ms!
image 76  contoursnumber: 77 time: 148.792 ms!
image 77  contoursnumber: 115 time: 221.789 ms!
image 78  contoursnumber: 103 time: 203.962 ms!
image 79  contoursnumber: 91 time: 175.547 ms!
image 80  contoursnumber: 84 time: 162.185 ms!
image 81  contoursnumber: 88 time: 169.608 ms!
image 82  contoursnumber: 72 time: 138.911 ms!
image 83  contoursnumber: 74 time: 142.687 ms!
image 84  contoursnumber: 75 time: 145.751 ms!
image 85  contoursnumber: 72 time: 139.121 ms!
image 86  contoursnumber: 71 time: 137.16 ms!
image 87  contoursnumber: 58 time: 111.89 ms!
image 88  contoursnumber: 61 time: 117.719 ms!
image 89  contoursnumber: 68 time: 131.638 ms!
image 90  contoursnumber: 49 time: 94.6617 ms!
image 91  contoursnumber: 52 time: 100.442 ms!
image 92  contoursnumber: 57 time: 110.199 ms!
image 93  contoursnumber: 60 time: 115.842 ms!
image 94  contoursnumber: 51 time: 98.3639 ms!
image 95  contoursnumber: 20 time: 38.7647 ms!
image 96  contoursnumber: 29 time: 56.1158 ms!
image 97  contoursnumber: 21 time: 40.8237 ms!
image 98  contoursnumber: 10 time: 19.4679 ms!
image 99  contoursnumber: 7 time: 13.7955 ms!
image 100  contoursnumber: 4 time: 7.94654 ms!
image 101  contoursnumber: 3 time: 6.07278 ms!
image 102  contoursnumber: 1 time: 2.28709 ms!
image 103  contoursnumber: 1 time: 2.31357 ms!
image 104  contoursnumber: 2 time: 4.27889 ms!
image 105  contoursnumber: 2 time: 4.27259 ms!
image 106  contoursnumber: 1 time: 2.32682 ms!
image 107  contoursnumber: 1 time: 2.37074 ms!
image 108  contoursnumber: 1 time: 2.33745 ms!
image 109  contoursnumber: 1 time: 2.34833 ms!
image 110  contoursnumber: 1 time: 2.31133 ms!
mean time 99.8847 ms
max time 540.462 ms



下一步是研究mkldnn-tbb以及 intel低精度AI等等。也许比上面的速度还快。另外看到这个作者干货|基于CPU的深度学习推理部署优化实践_weixin_34407348的博客-CSDN博客写的MKLDNN与Openvino的对比,我觉得后续可以测试下是否后者更快。另外这里也讲了使用MKLDNN设置OMP_NUM_THREADS =cpu核;KMP_BLOCKTIME = 10; KMP_AFFINITY=granularity=fine, verbose, compact,1,0设置这三个选项非常重要,而且我实际操作发现第一个选项并不是核数设置越多越快,还是多实践。


另外大家看下这篇 tensorflow C++ Mask RCNN图像分割,cv::dnn不能并行?openvino? - 秦时明月卫庄 - 博客园

我看了github上所有涉及M-RCNN C++ inference的实例,实在不知道问题在哪里,实例我放在这里:https://download.csdn.net/download/wd1603926823/54178267

#include "linkuang.h"
using namespace std;
using namespace cv;

linkuang::linkuang(int imgsize,float lesswaste_prob,float morewaste_prob) {
	// TODO Auto-generated constructor stub


	std::string graphpath="/home/jumper/xrt/reference/cnnmodel/model.pb";

	///CNN initiation--
	tensorflow::Status status = NewSession(tensorflow::SessionOptions(), &session);
	if (!status.ok())
		throw std::runtime_error("ERROR: linkuang CNN NewSession() init failed...");

	tensorflow::GraphDef graphdef;
	tensorflow::Status status_load = ReadBinaryProto(tensorflow::Env::Default(), graphpath, &graphdef);
	if (!status_load.ok())
		std::cout << status_load.ToString() <Create(graphdef);
	if (!status_create.ok())
		std::cerr <().data();
//	Mat cnninputImg(standard_rows, standard_cols, CV_32FC1, imgdata);
//	standardinput.convertTo(cnninputImg,CV_32FC1);
//	cnninputImg=cnninputImg/255;

	auto outputMap =resized_tensor.tensor();//获取tensor指针
	for(int r=0;r(r)[c])/255;

	//CNN input
	std::vector > inputs;
	std::string Input1Name = "input_1_1";//"input_1_1:0";
	inputs.push_back(std::make_pair(Input1Name, resized_tensor));

	//CNN predict
	std::vector outputs;
	std::string output="output_1";//"output_1:0";

	tensorflow::Status status_run = session->Run({{Input1Name,resized_tensor}}, {output}, {}, &outputs);
	if (!status_run.ok()) {
	   std::cout <<"ERROR: RUN failed in real inference()..."<< status_run.ToString() << "\n";
	   return -1;

	int flag=getOutputImg(outputs[0],outputimg);
		std::cout <<"ERROR: RUN failed in getCnnRealLabel()..."<(r)[c]<<"\t";
		cout<();        //

//	  cout<(r)[c]=255*value;
		cout<();        // Tensor Shape: [batch_size, target_class_num]
	  int output_dim = probabilities.shape().dim_size(1);  // Get the target_class_num from 1st dimension

	  float primerprob=tmap(0, 0);
	  if(tmap(0, 1)>primerprob)
		  primerprob=tmap(0, 1);


	  return 0;

linkuang::~linkuang() {
	// TODO Auto-generated destructor stub
	tensorflow::Status freestatus=session->Close();
	if (!freestatus.ok())
		throw std::runtime_error("ERROR: close session...");

