tensorRT 多batch 推理

#include 
#include 
#include "NvInfer.h"
#include "cuda_runtime_api.h"
// cuda include
#include 
#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include "logging.h"
#include "utils.hpp"
#include 
#include 
#include 
#include 
#include 

using namespace nvinfer1;
using namespace std;
static Logger gLogger;

#define checkRuntime(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)

bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
    if(code != cudaSuccess){
        const char* err_name = cudaGetErrorName(code);
        const char* err_message = cudaGetErrorString(code);
        printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);
        return false;
    }
    return true;
}


// // 通过智能指针管理nv返回的指针参数
// // 内存自动释放,避免泄漏
// template
// shared_ptr<_T> make_nvshared(_T* ptr){
//     return shared_ptr<_T>(ptr, [](_T* p){p->destroy();});
// }

// bool exists(const string& path){

// #ifdef _WIN32
//     return ::PathFileExistsA(path.c_str());
// #else
//     return access(path.c_str(), R_OK) == 0;
// #endif
// }

vector load_file(const string& file){
    ifstream in(file, ios::in | ios::binary);
    if (!in.is_open())
        return {};

    in.seekg(0, ios::end);
    size_t length = in.tellg();

    std::vector data;
    if (length > 0){
        in.seekg(0, ios::beg);
        data.resize(length);

        in.read((char*)&data[0], length);
    }
    in.close();
    return data;
}

IRuntime* runtime = nullptr;

ICudaEngine* loadEngine(const std::string& engine, int DLACore)
{
    std::ifstream engineFile(engine, std::ios::binary);
    if (!engineFile)
    {
        std::cout << "Error opening engine file: " << engine << std::endl;
        return nullptr;
    }

    engineFile.seekg(0, engineFile.end);
    long int fsize = engineFile.tellg();
    engineFile.seekg(0, engineFile.beg);

    std::vector engineData(fsize);
    engineFile.read(engineData.data(), fsize);
    if (!engineFile)
    {
        std::cout << "Error loading engine file: " << engine << std::endl;
        return nullptr;
    }

     runtime = createInferRuntime(gLogger);
    if (DLACore != -1)
    {
        runtime->setDLACore(DLACore);
    }

    return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
}

double timestamp_now_float() {
    return chrono::duration_cast(chrono::system_clock::now().time_since_epoch()).count() / 1000.0;
}

void model_infer(std::string mode_Path,std::string img_path, int input_batchsize, int input_height, int input_width, int output_size)
{
    // std::string strTrtSavedPath = "./res_hjxu_temp_dynamic.trt";
    int maxBatchSize = 1000;
    
    // 1、反序列化加载引擎
    ICudaEngine* engine = loadEngine(mode_Path, 0);
    
    // 2、创建context
    IExecutionContext* context = engine->createExecutionContext();

    int nNumBindings = engine->getNbBindings();
    std::vector vecBuffers;
    vecBuffers.resize(nNumBindings);
    int nInputIdx = 0;
    int nOutputIndex = 1;
    int nInputSize =  3 * input_height * input_width * sizeof(float);
    
    printf("output size:%d\n", output_size);
    // Create stream
    cudaStream_t stream;
    cudaStreamCreate(&stream);

    // 4、在cuda上创建一个最大的内存空间
    float* output_data_host = nullptr;
    (cudaMalloc(&vecBuffers[nInputIdx], nInputSize * maxBatchSize));
    (cudaMalloc(&vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float)));
    checkRuntime(cudaMallocHost(&output_data_host, maxBatchSize * output_size * sizeof(float)));

    std::string input_image_path = img_path;
    std::string imgPath = img_path;
    std::vector imagList;
    std::vector fileType{"jpg", "png"};
    readFileList(const_cast(imgPath.c_str()), imagList, fileType);
    double sumTime = 0;
    int img_size = 0;
    for (auto &input_image_path : imagList)
    {
        cv::Mat img = cv::imread(input_image_path);
        cv::Mat matRzImg;
        cv::resize(img, matRzImg, cv::Size(input_width, input_height));
        cv::Mat matF32Img;
        matRzImg.convertTo(matF32Img, CV_32FC3);
        matF32Img = matF32Img / 255.;
        cudaMemcpy((unsigned char *)vecBuffers[nInputIdx] + nInputSize*img_size, matF32Img.data, nInputSize, cudaMemcpyHostToDevice);
        img_size ++;
    }
    
    // 动态维度,设置batch = 1 0指第0个tensor的维度
    context->setBindingDimensions(0, Dims4(input_batchsize, 3, input_width, input_height));
    
    int times = 10000;
    // double begin_time = cv::getTickCount();
    auto begin_timer = timestamp_now_float();
    for(int i = 0; i < times; i++ ){
        //infer model
        context->executeV2(vecBuffers.data());
    }
    float total_inference_time = (timestamp_now_float() - begin_timer);
    printf(" total: %.2f ms, input_batchsize:%d\n", total_inference_time, input_batchsize);
    float fps = 1000 / ( total_inference_time / times) * input_batchsize;
    printf("FPS:[%.2f] \n", fps);

    (cudaMemcpy(output_data_host, vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float), cudaMemcpyDeviceToHost));
    
    // double end_time = cv::getTickCount();
    // double time_cost = (end_time - begin_time)/cv::getTickFrequency()*1000;
    // std::cout<<"inter cost time is "<setBindingDimensions(0, Dims4(4, 1, 112, 112));
    // context->executeV2(vecBuffers.data());
    // //context->execute(1, vecBuffers.data());
    // (cudaMemcpy(prob, vecBuffers[nOutputIndex], maxBatchSize * 2 * sizeof(float), cudaMemcpyDeviceToHost));
    return ;
}



int main(int argc, char **argv){
    if(argc != 7){
        printf("input paramer error, eg: ./Perception model_Path  img_path  batch height width output_size\n");
        return -1;
    }
    cudaSetDevice(0);
    int input_batchsize = atoi(argv[3]);
    int input_height = atoi(argv[4]);
    int input_width = atoi(argv[5]);
    int output_size = atoi(argv[6]);

    // printf("%d, %d, %d\n",input_batchsize, input_height, input_width, output_size);
    model_infer(argv[1], argv[2], input_batchsize, input_height, input_width, output_size);
    return 0;
}

你可能感兴趣的:(机器学习实践,计算机视觉)