#include
#include
#include "NvInfer.h"
#include "cuda_runtime_api.h"
// cuda include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "logging.h"
#include "utils.hpp"
#include
#include
#include
#include
#include
using namespace nvinfer1;
using namespace std;
static Logger gLogger;
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
if(code != cudaSuccess){
const char* err_name = cudaGetErrorName(code);
const char* err_message = cudaGetErrorString(code);
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
return false;
}
return true;
}
// // 通过智能指针管理nv返回的指针参数
// // 内存自动释放,避免泄漏
// template
// shared_ptr<_T> make_nvshared(_T* ptr){
// return shared_ptr<_T>(ptr, [](_T* p){p->destroy();});
// }
// bool exists(const string& path){
// #ifdef _WIN32
// return ::PathFileExistsA(path.c_str());
// #else
// return access(path.c_str(), R_OK) == 0;
// #endif
// }
vector load_file(const string& file){
ifstream in(file, ios::in | ios::binary);
if (!in.is_open())
return {};
in.seekg(0, ios::end);
size_t length = in.tellg();
std::vector data;
if (length > 0){
in.seekg(0, ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
IRuntime* runtime = nullptr;
ICudaEngine* loadEngine(const std::string& engine, int DLACore)
{
std::ifstream engineFile(engine, std::ios::binary);
if (!engineFile)
{
std::cout << "Error opening engine file: " << engine << std::endl;
return nullptr;
}
engineFile.seekg(0, engineFile.end);
long int fsize = engineFile.tellg();
engineFile.seekg(0, engineFile.beg);
std::vector engineData(fsize);
engineFile.read(engineData.data(), fsize);
if (!engineFile)
{
std::cout << "Error loading engine file: " << engine << std::endl;
return nullptr;
}
runtime = createInferRuntime(gLogger);
if (DLACore != -1)
{
runtime->setDLACore(DLACore);
}
return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
}
double timestamp_now_float() {
return chrono::duration_cast(chrono::system_clock::now().time_since_epoch()).count() / 1000.0;
}
void model_infer(std::string mode_Path,std::string img_path, int input_batchsize, int input_height, int input_width, int output_size)
{
// std::string strTrtSavedPath = "./res_hjxu_temp_dynamic.trt";
int maxBatchSize = 1000;
// 1、反序列化加载引擎
ICudaEngine* engine = loadEngine(mode_Path, 0);
// 2、创建context
IExecutionContext* context = engine->createExecutionContext();
int nNumBindings = engine->getNbBindings();
std::vector vecBuffers;
vecBuffers.resize(nNumBindings);
int nInputIdx = 0;
int nOutputIndex = 1;
int nInputSize = 3 * input_height * input_width * sizeof(float);
printf("output size:%d\n", output_size);
// Create stream
cudaStream_t stream;
cudaStreamCreate(&stream);
// 4、在cuda上创建一个最大的内存空间
float* output_data_host = nullptr;
(cudaMalloc(&vecBuffers[nInputIdx], nInputSize * maxBatchSize));
(cudaMalloc(&vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float)));
checkRuntime(cudaMallocHost(&output_data_host, maxBatchSize * output_size * sizeof(float)));
std::string input_image_path = img_path;
std::string imgPath = img_path;
std::vector imagList;
std::vector fileType{"jpg", "png"};
readFileList(const_cast(imgPath.c_str()), imagList, fileType);
double sumTime = 0;
int img_size = 0;
for (auto &input_image_path : imagList)
{
cv::Mat img = cv::imread(input_image_path);
cv::Mat matRzImg;
cv::resize(img, matRzImg, cv::Size(input_width, input_height));
cv::Mat matF32Img;
matRzImg.convertTo(matF32Img, CV_32FC3);
matF32Img = matF32Img / 255.;
cudaMemcpy((unsigned char *)vecBuffers[nInputIdx] + nInputSize*img_size, matF32Img.data, nInputSize, cudaMemcpyHostToDevice);
img_size ++;
}
// 动态维度,设置batch = 1 0指第0个tensor的维度
context->setBindingDimensions(0, Dims4(input_batchsize, 3, input_width, input_height));
int times = 10000;
// double begin_time = cv::getTickCount();
auto begin_timer = timestamp_now_float();
for(int i = 0; i < times; i++ ){
//infer model
context->executeV2(vecBuffers.data());
}
float total_inference_time = (timestamp_now_float() - begin_timer);
printf(" total: %.2f ms, input_batchsize:%d\n", total_inference_time, input_batchsize);
float fps = 1000 / ( total_inference_time / times) * input_batchsize;
printf("FPS:[%.2f] \n", fps);
(cudaMemcpy(output_data_host, vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float), cudaMemcpyDeviceToHost));
// double end_time = cv::getTickCount();
// double time_cost = (end_time - begin_time)/cv::getTickFrequency()*1000;
// std::cout<<"inter cost time is "<setBindingDimensions(0, Dims4(4, 1, 112, 112));
// context->executeV2(vecBuffers.data());
// //context->execute(1, vecBuffers.data());
// (cudaMemcpy(prob, vecBuffers[nOutputIndex], maxBatchSize * 2 * sizeof(float), cudaMemcpyDeviceToHost));
return ;
}
int main(int argc, char **argv){
if(argc != 7){
printf("input paramer error, eg: ./Perception model_Path img_path batch height width output_size\n");
return -1;
}
cudaSetDevice(0);
int input_batchsize = atoi(argv[3]);
int input_height = atoi(argv[4]);
int input_width = atoi(argv[5]);
int output_size = atoi(argv[6]);
// printf("%d, %d, %d\n",input_batchsize, input_height, input_width, output_size);
model_infer(argv[1], argv[2], input_batchsize, input_height, input_width, output_size);
return 0;
}