GTX 1050
CUDA 11.4
cudnn 8.9.1.23
TensorRT-8.6.1.6
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include
#include
#include
#include
#include
#include
using namespace cv;
using namespace nvinfer1;
using namespace nvonnxparser;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
class MyLogger : public ILogger {
public:
explicit MyLogger(Severity severity = Severity::kWARNING) : severity_(severity) {}
void log(Severity severity, const char* msg) noexcept override
{
if (severity <= severity_)
{
std::cerr << msg << std::endl;
}
}
Severity severity_;
};
const std::vector colors = { cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0) };
std::vector load_class_list(std::string class_path)
{
std::vector class_list;
std::ifstream ifs(class_path);
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
Mat img_preprocess(Mat image, int wid, int hei, float& ratio, int& x_offset, int& y_offset)
{
Mat resize_image;
ratio = std::min(wid / (image.cols * 1.0f), hei / (image.rows * 1.0f));
int border_width = image.cols * ratio;
int border_height = image.rows * ratio;
x_offset = (wid - border_width) / 2;
y_offset = (hei - border_height) / 2;
resize(image, resize_image, Size(border_width, border_height));
copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset, x_offset,
cv::BORDER_CONSTANT, Scalar(114, 114, 114));
cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
return resize_image;
}
void detect(cv::Mat& frame, ICudaEngine* engine, std::vector& output,
const std::vector& className, float score_thr, float conf_thr, float nms_thr)
{
float ratio;
int x_offset, y_offset;
Mat resize_image = img_preprocess(frame, 640, 640, ratio, x_offset, y_offset);
void* buffers[2];
Dims input_dim = engine->getBindingDimensions(0); // 1,3,640,640
int input_size = 1;
for (int j = 0; j < input_dim.nbDims; ++j)
{
input_size *= input_dim.d[j];
}
size_t input_sz = input_size * sizeof(float);
cudaMalloc(&buffers[0], input_sz);
Dims output_dim = engine->getBindingDimensions(1); // 1,25200,85
int output_size = 1;
for (int j = 0; j < output_dim.nbDims; ++j)
{
output_size *= output_dim.d[j];
}
size_t output_sz = output_size * sizeof(float);
cudaMalloc(&buffers[1], output_sz);
float* input_blob = new float[input_size]; // host
float* output_blob = new float[output_size]; // host
std::cout << "Get input blob!--------------------\n";
const int channels = resize_image.channels();
const int width = resize_image.cols;
const int height = resize_image.rows;
for (int c = 0; c < channels; c++) {
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
input_blob[c * width * height + h * width + w] =
resize_image.at(h, w)[c] / 255.0f;
}
}
}
std::cout << "Performing Inference!--------------------\n";
IExecutionContext* context = engine->createExecutionContext();
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaMemcpyAsync(buffers[0], input_blob, input_sz, cudaMemcpyHostToDevice, stream);
context->enqueueV2(buffers, stream, nullptr);
cudaMemcpyAsync(output_blob, buffers[1], output_sz, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
delete context;
cudaFree(buffers[0]);
cudaFree(buffers[1]);
std::vector boxes;
std::vector labels;
std::vector confs;
float* ptr = output_blob;
const int dim = className.size() + 5;
for (int i = 0; i < 25200; ++i) {
float objectness = ptr[4];
if (objectness >= score_thr) {
int label = std::max_element(ptr + 5, ptr + dim) - (ptr + 5);
float confidence = ptr[5 + label] * objectness;
if (confidence >= conf_thr) {
float bx = ptr[0];
float by = ptr[1];
float bw = ptr[2];
float bh = ptr[3];
Rect box;
box.x = (bx - bw * 0.5f - x_offset) / ratio;
box.y = (by - bh * 0.5f - y_offset) / ratio;
box.width = bw / ratio;
box.height = bh / ratio;
boxes.push_back(box);
labels.push_back(label);
confs.push_back(confidence);
}
}
ptr += dim;
}
delete[] input_blob;
delete[] output_blob;
std::vector indices;
cv::dnn::NMSBoxes(boxes, confs, conf_thr, nms_thr, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
Detection result;
result.class_id = labels[idx];
result.confidence = confs[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main()
{
// TensorRT-8.6.1.6 ---- 2023.6.2
std::string onnx_model = "E:\\model\\yolov5s\\yolov5s.onnx";
std::string source_path = "E:\\model\\yolov5s\\sample.mp4";
std::string class_path = "E:\\model\\yolov5s\\classes.txt";
float score_thr = 0.25;
float conf_thr = 0.25;
float nms_thr = 0.5;
std::cout << "Load class names!--------------------" << std::endl;
std::vector class_list = load_class_list(class_path);
std::cout << "Create a network!--------------------" << std::endl;
MyLogger logger;
IBuilder* builder = createInferBuilder(logger);
uint32_t explicit_batch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
INetworkDefinition* network = builder->createNetworkV2(explicit_batch);
std::cout << "Import model parser!--------------------" << std::endl;
IParser* parser = createParser(*network, logger);
parser->parseFromFile(onnx_model.c_str(), static_cast(ILogger::Severity::kWARNING));
for (int i = 0; i < parser->getNbErrors(); ++i)
{
std::cout << parser->getError(i)->desc() << std::endl;
}
std::cout << "Building an Engine!--------------------" << std::endl;
IBuilderConfig* config = builder->createBuilderConfig();
config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, 1U << 26);
IHostMemory* model = builder->buildSerializedNetwork(*network, *config);
std::cout << "Deserializing a Plan!--------------------" << std::endl;
IRuntime* runtime = createInferRuntime(logger);
ICudaEngine* engine = runtime->deserializeCudaEngine(model->data(), model->size());
delete config;
delete parser;
delete network;
delete builder;
delete model;
delete runtime;
cv::VideoCapture capture(source_path);
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
int total_frames = 0;
float fps = -1;
while (true)
{
cv::Mat frame;
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector output;
detect(frame, engine, output, class_list, score_thr, conf_thr, nms_thr);
frame_count++;
total_frames++;
int n_det = output.size();
for (size_t i = 0; i < n_det; i++)
{
auto detection = output[i];
auto box = detection.box;
auto class_id = detection.class_id;
auto conf = detection.confidence;
const auto color = colors[class_id % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[class_id].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}
链接:https://pan.baidu.com/s/150ZEyUhhJoGXkTXWoKiWYg?pwd=65g1
提取码:65g1