ONNX (Open Neural Network Exchange) 是微软和脸书主导的深度学习开发工具生态系统,ONNX Runtime (简称 ORT) 则是微软开发的跨平台高性能机器学习训练与推理加速器,根据官方的说法推理/训练速度最高能有 17X/1.4X 的提升,其优异的性能非常适合深度学习模型部署。
git clone --recursive https://github.com/Microsoft/onnxruntime
cd onnxruntime/
git checkout v1.13.0
ONNXRuntime版本和cuda、cudnn版本要对应,具体参考官方链接:https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html。
这里选择了1.13.0版本
ONNX Runtime | CUDA | cuDNN |
---|---|---|
1.14 /1.13.1 / 1.13 | 11.6 | 8.2.4 (Linux) / 8.5.0.96 (Windows) |
./build.sh --skip_tests --use_cuda --config Release --build_shared_lib --parallel --cuda_home /usr/local/cuda-11.6 --cudnn_home /usr/local/cuda-11.6
–use_cuda表示build with CUDA support,cuda_home和cudnn_home指向cuda和cudnn的安装路径
cd ${your git repo root}
cd .git
vim config
修改为:
[core]
repositoryformatversion = 0
filemode = true
bare = false
logallrefupdates = true
ignorecase = true
precomposeunicode = true
[remote "origin"]
url = https://github.com.cnpmjs.org/microsoft/onnxruntime.git
fetch = +refs/tags/v1.13.0:refs/tags/v1.13.0
cd ./build/Linux/release
make install DESTDIR=想要安装的路径
# onnxruntime
export ONNX_HOME=/home/user/3rd-party/onnx/usr/local
export PATH=$PATH:$ONNX_HOME/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ONNX_HOME/lib
export LIBRARY_PATH=$LIBRARY_PATH:$ONNX_HOME/lib
export C_INCLUDE_PATH=$C_INCLUDE_PATH:$ONNX_HOME/include
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$ONNX_HOME/include
#include
#include
#include
#include
#include
#include
using namespace std;
int main()
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions session_options;
OrtCUDAProviderOptions cuda_options{
0,
OrtCudnnConvAlgoSearch::EXHAUSTIVE,
std::numeric_limits<size_t>::max(),
0,
true
};
session_options.AppendExecutionProvider_CUDA(cuda_options);
const char* model_path = "../model_test.onnx";
Ort::Session session(env, model_path, session_options);
// print model input layer (node names, types, shape etc.)
Ort::AllocatorWithDefaultOptions allocator;
// print number of model input nodes
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names = {"input_0", "input_1"};
std::vector<const char*> output_node_names = {"dense_2", "tf.math.multiply_2"};
std::vector<int64_t> input_node_dims = {1, 50, 9};
std::vector<int64_t> input_node_dims2 = {1, 50, 2};
// 设置输入
size_t input_tensor_size = 50 * 9;
size_t input_tensor_size2 = 50 * 2;
std::vector<float> input_tensor_values(input_tensor_size);
std::vector<float> input_tensor_values2(input_tensor_size2);
//测试100次所需的推理时间
auto start = std::chrono::system_clock::now();
for(int i=0; i<10; i++)
{
// 测试每次推理所需时间
auto start2 = std::chrono::system_clock::now();
for (unsigned int i = 0; i < input_tensor_size; i++)
input_tensor_values[i] = 1.f;
for (unsigned int i = 0; i < input_tensor_size2; i++)
input_tensor_values2[i] = 1.f;
// create input tensor object from data values
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
auto memory_info2 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(),
input_tensor_size, input_node_dims.data(), 3);
Ort::Value input_tensor2 = Ort::Value::CreateTensor<float>(memory_info2, input_tensor_values2.data(),
input_tensor_size2, input_node_dims2.data(), 3);
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));
ort_inputs.push_back(std::move(input_tensor2));
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(),
ort_inputs.size(), output_node_names.data(), 1);
float* floatarr = output_tensors[0].GetTensorMutableData<float>();
for (int i=0; i<4; i++)
{
std::cout<<floatarr[i]<<std::endl;
}
auto end2 = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds2 = end2-start2;
std::cout << "elapsed time: " << elapsed_seconds2.count() << "s\n";
}
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
return 0;
}
cmake_minimum_required(VERSION 3.13)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
link_directories(
/home/onnx/lib
)
add_executable(onnx onnx_test.cpp)
target_link_libraries(onnx onnxruntime onnxruntime_providers_shared)