1.配置
qt的pro配置文件
#TensorRT
#头文件路径
INCLUDEPATH += /usr/include/x86_64-linux-gnu
#查找:sudo find / -name "NvInfer.h"
#链接TensorRT的库文件
LIBS += -L/lib/x86_64-linux-gnu -lnvinfer
LIBS += -L/lib/x86_64-linux-gnu -lnvonnxparser
LIBS += -L/lib/x86_64-linux-gnu -lnvinfer_plugin
#查找方式ldconfig -p | grep libnvinfer
2.构建模型
【参考官网】https://developer.nvidia.com/zh-cn/blog/tensorrt-c-interface-cn/
void build_model()
{
IBuilder* builder = createInferBuilder(logger);
//【创建网络定义】
uint32_t flag = 1U <<static_cast<uint32_t>
(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
INetworkDefinition* network = builder->createNetworkV2(flag);
//【创建网络定义】
IParser* parser = createParser(*network, logger);
const char* modelFile="";
parser->parseFromFile(modelFile,static_cast<int32_t>(ILogger::Severity::kWARNING));
for(int32_t i = 0; i < parser->getNbErrors(); ++i)
{
std::cout << parser->getError(i)->desc() << std::endl;
}
//【构建配置】
IBuilderConfig* config = builder->createBuilderConfig();
config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, 1U << 20);
IHostMemory* serializedModel = builder->buildSerializedNetwork(*network, *config);
delete parser;
delete network;
delete config;
delete builder;
//【保存模型】....
delete serializedModel;
}
2.1 构建模型
构建模型可以进一步细化
bool constructNetwork(nvinfer1::IBuilder *builder, nvinfer1::INetworkDefinition *network, nvinfer1::IBuilderConfig *config, nvonnxparser::IParser *parser)
{
// 解析onnx文件
if (!parser->parseFromFile(modelFile,static_cast<int32_t>(ILogger::Severity::kWARNING)))
{
return false;
}
if (RUN_FP16)
{
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
if (RUN_INT8)
{
config->setFlag(nvinfer1::BuilderFlag::kINT8);
}
return true;
}
2.2 保存模型
// 保存plan文件数据
bool saveEngineFile(nvinfer1::IHostMemory *data)
{
std::ofstream file;
file.open(m_engine_file, std::ios::binary | std::ios::out);
cout << "writing engine file..." << endl;
file.write((const char *)data->data(), data->size());
cout << "save engine file done" << endl;
file.close();
return true;
}
nvinfer1::ICudaEngine *m_engine;
trt_model_stream = m_engine->serialize();
nvinfer1::IHostMemory *data = builder->buildSerializedNetwork(*network,*config);
saveEngineFile(data);
3.运行时
直接加载Engine
nvinfer1::ICudaEngine *m_engine;
bool loadEngineFromFile()
{
int length = 0; // 记录data的长度
std::unique_ptr<char[]> data = readEngineFile(length);
nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger());
m_engine = runtime->deserializeCudaEngine(data.get(), length);
if (!m_engine)
{
std::cout << "Failed to create engine" << std::endl;
return false;
}
return true;
}
运行推理应该至少包含以下几步
3.2.1 创建nvinfer1
nvinfer1::IExecutionContext *context = m_engine->createExecutionContext();
assert(context != nullptr);
3.2.2 为输入和输出创建空间
int nbBindings = m_engine->getNbBindings();
assert(nbBindings == 2); // 输入和输出,一共是2个
// 为输入和输出创建空间
void *buffers[2]; // 待创建的空间 为指针数组
std::vector<int64_t> buffer_size; // 要创建的空间大小
buffer_size.resize(nbBindings);
for (int i = 0; i < nbBindings; i++)
{
nvinfer1::Dims dims = m_engine->getBindingDimensions(i); // (3, 224, 224) (1000)
nvinfer1::DataType dtype = m_engine->getBindingDataType(i); // 0, 0 也就是两个都是kFloat类型
// std::cout << static_cast(dtype) << endl;
int64_t total_size = volume(dims) * 1 * getElementSize(dtype);
buffer_size[i] = total_size;
CHECK(cudaMalloc(&buffers[i], total_size));
}
3.2.3 推理
// 将输入传递到GPU
CHECK(cudaMemcpyAsync(buffers[0], cur_input.data(), buffer_size[0], cudaMemcpyHostToDevice, stream));
// 异步执行
t_start = std::chrono::high_resolution_clock::now();
context->enqueueV2(&buffers[0],stream,nullptr);
// 输出传回给CPU
CHECK(cudaMemcpyAsync(out, buffers[1], buffer_size[1], cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);