TensorRT推理
#include
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvInferPlugin.h"
using namespace nvinfer1;
using namespace std;
int main(int argc, char** argv)
{
// create TensorRT inference engine
IBuilder* builder = createInferBuilder(gLogger);
INetworkDefinition* network = builder->createNetwork();
// create ONNX parser
IParser* parser = createParser(*network, gLogger);
parser->parseFromFile("model.onnx", -1);
// build TensorRT engine
builder->setMaxBatchSize(1);
builder->setMaxWorkspaceSize(1 << 30);
ICudaEngine* engine = builder->buildCudaEngine(*network);
// create execution context
IExecutionContext* context = engine->createExecutionContext();
// create input and output buffers
float* input = new float[3 * 224 * 224];
float* output = new float[1000];
// run inference
context->execute(1, { { 0, input } }, { { 0, output } });
// print results
for (int i = 0; i < 1000; i++)
{
cout << output[i] << endl;
}
// cleanup
delete[] input;
delete[] output;
context->destroy();
engine->destroy();
network->destroy();
builder->destroy();
parser->destroy();
return 0;
}
NCNN推理
#include "net.h"
#include
using namespace std;
using namespace ncnn;
int main(int argc, char** argv)
{
// create ncnn inference net
Net net;
net.load_param("model.param");
net.load_model("model.bin");
// create input and output mat
Mat input = Mat(224, 224, 3);
Mat output;
// run inference
net.run(input, output);
// print results
for (int i = 0; i < 1000; i++)
{
cout << output[i] << endl;
}
return 0;
}
TVM推理
#include
#include
#include
#include
#include
#include
#include
using namespace std;
using namespace tvm::runtime;
int main(int argc, char** argv)
{
// load compiled module
Module mod = Module::LoadFromFile("model.so");
// create TVM runtime function
PackedFunc tvm_run = mod.GetFunction("run");
// create input and output NDArray
DLTensor* input;
DLTensor* output;
int64_t in_shape[4] = {1, 3, 224, 224};
int64_t out_shape[2] = {1, 1000};
TVMArrayAlloc(in_shape, 4, kDLFloat, 32, kDLCPU, &input);
TVMArrayAlloc(out_shape, 2, kDLFloat, 32, kDLCPU, &output);
// fill input data
float* input_data = static_cast
for (int i = 0; i < 3 * 224 * 224; i++)
{
input_data[i] = 1.0;
}
// run inference
tvm_run(input, output);
// print results
float* output_data = static_cast
for (int i = 0; i < 1000; i++)
{
cout << output_data[i] << endl;
}
// cleanup
TVMArrayFree(input);
TVMArrayFree(output);
return 0;
}
//openvino
#include
#include
#include
#include
using namespace InferenceEngine;
int main(int argc, char** argv)
{
// create Inference Engine Core
Core ie;
// read network model and weights
CNNNetwork network = ie.ReadNetwork("model.xml", "model.bin");
// set batch size and input shape
network.setBatchSize(1);
InputsDataMap input_info(network.getInputsInfo());
input_info.begin()->second->setPrecision(Precision::FP32);
input_info.begin()->second->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
input_info.begin()->second->getPreProcess().setColorFormat(COLOR_BGR);
input_info.begin()->second->getInputData()->setLayout(Layout::NCHW);
input_info.begin()->second->getInputData()->setDims({1, 3, 224, 224});
// set output precision
OutputsDataMap output_info(network.getOutputsInfo());
output_info.begin()->second->setPrecision(Precision::FP32);
// load network into executable network
ExecutableNetwork exec_network = ie.LoadNetwork(network, "CPU");
// create inference request
InferRequest infer_request = exec_network.CreateInferRequest();
// create input and output blobs
Blob::Ptr input_blob = infer_request.GetBlob(input_info.begin()->first);
Blob::Ptr output_blob = infer_request.GetBlob(output_info.begin()->first);
// fill input data
float* input_data = input_blob->buffer().as
for (int i = 0; i < 3 * 224 * 224; i++)
{
input_data[i] = 1.0;
}
// run inference
infer_request.Infer();
// print results
float* output_data = output_blob->buffer().as
for (int i = 0; i < 1000; i++)
{
printf("%f\n", output_data[i]);
}
return 0;
}