TensorRT推理、NCNN推理、TVM推理、openvino推理

TensorRT推理

#include
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvInferPlugin.h"

using namespace nvinfer1;
using namespace std;

int main(int argc, char** argv)
{
    // create TensorRT inference engine
    IBuilder* builder = createInferBuilder(gLogger);
    INetworkDefinition* network = builder->createNetwork();

    // create ONNX parser
    IParser* parser = createParser(*network, gLogger);
    parser->parseFromFile("model.onnx", -1);

    // build TensorRT engine
    builder->setMaxBatchSize(1);
    builder->setMaxWorkspaceSize(1 << 30);
    ICudaEngine* engine = builder->buildCudaEngine(*network);

    // create execution context
    IExecutionContext* context = engine->createExecutionContext();

    // create input and output buffers
    float* input = new float[3 * 224 * 224];
    float* output = new float[1000];

    // run inference
    context->execute(1, { { 0, input } }, { { 0, output } });

    // print results
    for (int i = 0; i < 1000; i++)
    {
        cout << output[i] << endl;
    }

    // cleanup
    delete[] input;
    delete[] output;
    context->destroy();
    engine->destroy();
    network->destroy();
    builder->destroy();
    parser->destroy();

    return 0;
}
NCNN推理

#include "net.h"
#include

using namespace std;
using namespace ncnn;

int main(int argc, char** argv)
{
    // create ncnn inference net
    Net net;
    net.load_param("model.param");
    net.load_model("model.bin");

    // create input and output mat
    Mat input = Mat(224, 224, 3);
    Mat output;

    // run inference
    net.run(input, output);

    // print results
    for (int i = 0; i < 1000; i++)
    {
        cout << output[i] << endl;
    }

    return 0;
}

TVM推理

#include
#include
#include
#include
#include
#include
#include

using namespace std;
using namespace tvm::runtime;

int main(int argc, char** argv)
{
    // load compiled module
    Module mod = Module::LoadFromFile("model.so");

    // create TVM runtime function
    PackedFunc tvm_run = mod.GetFunction("run");

    // create input and output NDArray
    DLTensor* input;
    DLTensor* output;
    int64_t in_shape[4] = {1, 3, 224, 224};
    int64_t out_shape[2] = {1, 1000};
    TVMArrayAlloc(in_shape, 4, kDLFloat, 32, kDLCPU, &input);
    TVMArrayAlloc(out_shape, 2, kDLFloat, 32, kDLCPU, &output);

    // fill input data
    float* input_data = static_cast(input->data);
    for (int i = 0; i < 3 * 224 * 224; i++)
    {
        input_data[i] = 1.0;
    }

    // run inference
    tvm_run(input, output);

    // print results
    float* output_data = static_cast(output->data);
    for (int i = 0; i < 1000; i++)
    {
        cout << output_data[i] << endl;
    }

    // cleanup
    TVMArrayFree(input);
    TVMArrayFree(output);

    return 0;
}

//openvino

#include
#include
#include
#include

using namespace InferenceEngine;

int main(int argc, char** argv)
{
    // create Inference Engine Core
    Core ie;

    // read network model and weights
    CNNNetwork network = ie.ReadNetwork("model.xml", "model.bin");

    // set batch size and input shape
    network.setBatchSize(1);
    InputsDataMap input_info(network.getInputsInfo());
    input_info.begin()->second->setPrecision(Precision::FP32);
    input_info.begin()->second->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
    input_info.begin()->second->getPreProcess().setColorFormat(COLOR_BGR);
    input_info.begin()->second->getInputData()->setLayout(Layout::NCHW);
    input_info.begin()->second->getInputData()->setDims({1, 3, 224, 224});

    // set output precision
    OutputsDataMap output_info(network.getOutputsInfo());
    output_info.begin()->second->setPrecision(Precision::FP32);

    // load network into executable network
    ExecutableNetwork exec_network = ie.LoadNetwork(network, "CPU");

    // create inference request
    InferRequest infer_request = exec_network.CreateInferRequest();

    // create input and output blobs
    Blob::Ptr input_blob = infer_request.GetBlob(input_info.begin()->first);
    Blob::Ptr output_blob = infer_request.GetBlob(output_info.begin()->first);

    // fill input data
    float* input_data = input_blob->buffer().as();
    for (int i = 0; i < 3 * 224 * 224; i++)
    {
        input_data[i] = 1.0;
    }

    // run inference
    infer_request.Infer();

    // print results
    float* output_data = output_blob->buffer().as();
    for (int i = 0; i < 1000; i++)
    {
        printf("%f\n", output_data[i]);
    }

    return 0;
}

你可能感兴趣的:(c++,算法,java)