





#include "yolo.h"

using namespace std;
using namespace cv;
using namespace dnn;

int main()
  string model_path = "yolov5sGPU.onnx";//CPU可用    release GPU  
  Yolo test;
  Net net;
  if (test.readModel(net, model_path, true))//false: CPU    true:GPU
    cout << "read net ok!" << endl;
  else {
    return -1;
  bool vedio = true;
  if (vedio)
    // 用 OpenCV 打开摄像头读取文件(你随便咋样获取图片都OK哪)
    cv::VideoCapture cap = cv::VideoCapture("test2.mp4");
    // 设置宽高 无所谓多宽多高后面都会通过一个算法转换为固定宽高的
    // 固定宽高值应该是你通过YoloV5训练得到的模型所需要的
    // 传入方式是构造 YoloV5 对象时传入 width 默认值为 640,height 默认值为 640
    //cap.set(cv::CAP_PROP_FRAME_WIDTH, 1000);
    //cap.set(cv::CAP_PROP_FRAME_HEIGHT, 800);

    cv::Mat frame;
    vector color;
    for (int i = 0; i < 80; i++) {//80种类
      int b = rand() % 256;
      int g = rand() % 256;
      int r = rand() % 256;
      color.push_back(Scalar(b, g, r));
    //color.push_back(Scalar(0, 0, 255));
    while (cap.isOpened())
      // 读取一帧

      if (frame.empty())
        std::cout << "Read frame failed! or The End!" << std::endl;
      vector result;
      if (test.Detect(frame, net, result))//返回true 检测到对象//
        test.drawPred(frame, result, color);
      else {
        cout << "Detect Failed!" << endl;// CPU模式 有很多识别不到。GPU模式都可识别
      //resize(frame, frame, Size(960, 540));
      cv::imshow("result", frame);
      if (cv::waitKey(1) == 27) break;
    return 0;
    String folder_path = "./image";
    //String folder_path = "./test2";
    //String folder_path = "./test";
    std::vector file_names;
    cv::glob(folder_path, file_names);   //get file names

    for (int i = 0; i < file_names.size(); i++) {
      vector result;
      cv::Mat img;
      std::cout << file_names[i] << std::endl;
      img = cv::imread(file_names[i]);
      if (!img.data) {
      //resize(img, img, Size(956, 800));
      if (test.Detect(img, net, result))//返回true 检测到对象//
        test.drawPred(img, result, color);
      else {
        cout << "Detect Failed!" << endl;// CPU模式 有很多识别不到。GPU模式都可识别
      //resize(img, img, Size(717, 600));//Size(956, 800)
      //resize(img, img, Size(1434, 1200));
      namedWindow("result", cv::WINDOW_AUTOSIZE);
      imshow("result", img);

  return 0;


#pragma once

#define YOLO_P6 false //是否使用P6模型//

struct Output {
  int id;             //结果类别id/
  float confidence;   //结果置信度//
  cv::Rect box;       //矩形框//

class Yolo {
  Yolo() {
  ~Yolo() {}
  bool readModel(cv::dnn::Net& net, std::string& netPath, bool isCuda);
  bool Detect(cv::Mat& SrcImg, cv::dnn::Net& net, std::vector& output);
  void drawPred(cv::Mat& img, std::vector result, std::vector color);

#if(defined YOLO_P6 && YOLO_P6==true)
  const float netAnchors[4][6] = { { 19,27, 44,40, 38,94 },{ 96,68, 86,152, 180,137 },{ 140,301, 303,264, 238,542 },{ 436,615, 739,380, 925,792 } };

  const int netWidth = 1280;  //ONNX图片输入宽度
  const int netHeight = 1280; //ONNX图片输入高度

  const int strideSize = 4;  //stride size
  const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };

  const int netWidth = 640;   //ONNX图片输入宽度 yolov5s.onnx 640
  const int netHeight = 640;  //ONNX图片输入高度

  const int strideSize = 3;   //stride size
#endif // YOLO_P6

  const float netStride[4] = { 8, 16.0,32,64 };

  float boxThreshold = 0.25;
  float classThreshold = 0.25;

  float nmsThreshold = 0.45;
  float nmsScoreThreshold = boxThreshold * classThreshold;

  std::vector className = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    "hair drier", "toothbrush" };


using namespace std;
using namespace cv;
using namespace cv::dnn;

bool Yolo::readModel(Net& net, string& netPath, bool isCuda = false) {
  try {
    net = readNet(netPath);
  catch (const std::exception&) {
    return false;
  if (isCuda) {
  else {
  return true;
bool Yolo::Detect(Mat& SrcImg, Net& net, vector& output) {
  Mat blob;
  int col = SrcImg.cols;
  int row = SrcImg.rows;
  int maxLen = MAX(col, row);
  Mat netInputImg = SrcImg.clone();
  if (maxLen > 1.2 * col || maxLen > 1.2 * row) {
    Mat resizeImg = Mat::zeros(maxLen, maxLen, CV_8UC3);
    SrcImg.copyTo(resizeImg(Rect(0, 0, col, row)));
    netInputImg = resizeImg;
  blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(0, 0, 0), true, false);
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(104, 117, 123), true, false);
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(114, 114,114), true, false);
  std::vector netOutputImg;
  //vector outputLayerName{"345","403", "461","output" };
  //net.forward(netOutputImg, outputLayerName[3]); //获取output的输出//
  {  //release OK
    net.forward(netOutputImg, net.getUnconnectedOutLayersNames());//debug报错 initCUDABackend CUDA backend will fallback to the CPU implementation for the layer "_input"
  catch (const std::exception& e)
    cout << e.what();

  std::vector classIds;//结果id数组//
  std::vector confidences;//结果每个id对应置信度数组//
  std::vector boxes;//每个id矩形框//
  float ratio_h = (float)netInputImg.rows / netHeight;
  float ratio_w = (float)netInputImg.cols / netWidth;
  int net_width = className.size() + 5;  //输出的网络宽度是类别数+5//
  float* pdata = (float*)netOutputImg[0].data;
  for (int stride = 0; stride < strideSize; stride++) {    //stride
    int grid_x = (int)(netWidth / netStride[stride]);
    int grid_y = (int)(netHeight / netStride[stride]);
    for (int anchor = 0; anchor < 3; anchor++) {  //anchors
      const float anchor_w = netAnchors[stride][anchor * 2];
      const float anchor_h = netAnchors[stride][anchor * 2 + 1];
      for (int i = 0; i < grid_y; i++) {
        for (int j = 0; j < grid_x; j++) {
          float box_score = pdata[4]; ;//获取每一行的box框中含有某个物体的概率//
          if (box_score >= boxThreshold) {
            cv::Mat scores(1, className.size(), CV_32FC1, pdata + 5);
            Point classIdPoint;
            double max_class_socre;
            minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
            max_class_socre = (float)max_class_socre;
            if (max_class_socre >= classThreshold) {
              //rect [x,y,w,h]
              float x = pdata[0];  //x
              float y = pdata[1];  //y
              float w = pdata[2];  //w
              float h = pdata[3];  //h
              int left = (x - 0.5 * w) * ratio_w;
              int top = (y - 0.5 * h) * ratio_h;
              confidences.push_back(max_class_socre * box_score);
              boxes.push_back(Rect(left, top, int(w * ratio_w), int(h * ratio_h)));
          pdata += net_width;//下一行//

  vector nms_result;
  NMSBoxes(boxes, confidences, nmsScoreThreshold, nmsThreshold, nms_result);
  for (int i = 0; i < nms_result.size(); i++) {
    int idx = nms_result[i];
    Output result;
    result.id = classIds[idx];
    result.confidence = confidences[idx];
    result.box = boxes[idx];
  if (output.size())
    return true;
    return false;

void Yolo::drawPred(Mat& img, vector result, vector color) {
  for (int i = 0; i < result.size(); i++) {
    int left, top;
    left = result[i].box.x;
    top = result[i].box.y;
    int color_num = i;
    //rectangle(img, result[i].box, color[result[i].id], 2, 8);
    rectangle(img, result[i].box, color[result[i].id], 2, 8);
    string label = className[result[i].id] + ":" + to_string(result[i].confidence);

    int baseLine;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = max(top, labelSize.height);
    //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
    putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, color[result[i].id], 2);
  //imshow("1", img);
  imwrite("out.bmp", img);






