yolov6-onnx + opencv-DNN

yolov6n.onnx https://github.com/meituan/YOLOv6/releases 官方提供


// Namespaces.
using namespace cv;
using namespace std;
using namespace cv::dnn;

// Constants.
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.5;
const float NMS_THRESHOLD = 0.45;
const float CONFIDENCE_THRESHOLD = 0.45;

// Text parameters.
const float FONT_SCALE = 0.7;
const int THICKNESS = 1;

// Colors.
Scalar BLACK = Scalar(0, 0, 0);
Scalar BLUE = Scalar(255, 178, 50);
Scalar YELLOW = Scalar(0, 255, 255);
Scalar RED = Scalar(0, 0, 255);

// Draw the predicted bounding box.
void draw_label(Mat& input_image, string label, int left, int top)
    // Display the label at the top of the bounding box.
    int baseLine;
    Size label_size = getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
    top = max(top, label_size.height);
    // Top left corner.
    Point tlc = Point(left, top);
    // Bottom right corner.
    Point brc = Point(left + label_size.width, top + label_size.height + baseLine);
    // Draw black rectangle.
    rectangle(input_image, tlc, brc, BLACK, FILLED);
    // Put the label on the black rectangle.
    putText(input_image, label, Point(left, top + label_size.height), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS);

vector pre_process(Mat& input_image, Net& net)
    // Convert to blob.
    Mat blob;
    blobFromImage(input_image, blob, 1. / 255., Size(INPUT_WIDTH, INPUT_HEIGHT), Scalar(), true, false);


    // Forward propagate.
    vector outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());

    return outputs;

Mat post_process(Mat& input_image, vector& outputs, const vector& class_name)
    // Initialize vectors to hold respective outputs while unwrapping detections.
    vector class_ids;
    vector confidences;
    vector boxes;

    // Resizing factor.
    float x_factor = input_image.cols / INPUT_WIDTH;
    float y_factor = input_image.rows / INPUT_HEIGHT;

    float* data = (float*)outputs[0].data;

    const int dimensions = 85;
    const int rows = 8400;
    // Iterate through 8400 detections.
    for (int i = 0; i < rows; ++i)
        float confidence = data[4];
        // Discard bad detections and continue.
        if (confidence >= CONFIDENCE_THRESHOLD)
            float* classes_scores = data + 5;
            // Create a 1x85 Mat and store class scores of 80 classes.
            Mat scores(1, class_name.size(), CV_32FC1, classes_scores);
            // Perform minMaxLoc and acquire index of best class score.
            Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            // Continue if the class score is above the threshold.
            if (max_class_score > SCORE_THRESHOLD)
                // Store class ID and confidence in the pre-defined respective vectors.


                // Center.
                float cx = data[0];
                float cy = data[1];
                // Box dimension.
                float w = data[2];
                float h = data[3];
                // Bounding box coordinates.
                int left = int((cx - 0.5 * w) * x_factor);
                int top = int((cy - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                // Store good detections in the boxes vector.
                boxes.push_back(Rect(left, top, width, height));

        // Jump to the next column.
        data += 85;

    // Perform Non Maximum Suppression and draw predictions.
    vector indices;
    NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
    for (int i = 0; i < indices.size(); i++)
        int idx = indices[i];
        Rect box = boxes[idx];

        int left = box.x;
        int top = box.y;
        int width = box.width;
        int height = box.height;
        // Draw bounding box.
        rectangle(input_image, Point(left, top), Point(left + width, top + height), BLUE, 3 * THICKNESS);

        // Get the label for the class name and its confidence.
        string label = format("%.2f", confidences[idx]);
        label = class_name[class_ids[idx]] + ":" + label;
        // Draw class labels.
        draw_label(input_image, label, left, top);
    return input_image;

int main(int argc, char** argv)
    // Usage: "./yolov6 /path/to/your/model/yolov6n.onnx /path/to/image/sample.jpg /path/to/coco.names"
    // printf(CV_VERSION);
    // Load class list.
    vector class_list;
    ifstream ifs("coco.names");// coco.names  argv[3]
    string line;

    while (getline(ifs, line))

    // Load image.
    Mat frame;
    frame = imread("v_0.jpg");//v_0.jpg  argv[2]
    Mat input_frame = frame.clone();

    // Load model.
    Net net;
    net = readNetFromONNX("model_s/yolov6n.onnx");//argv[1]//yolov6n.onnx  yolov6s_base_bs1.onnx

    // Put efficiency information.
    // The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
    int cycles = 30;
    double total_time = 0;
    double freq = getTickFrequency() / 1000;
    Mat img;
    for (int i = 0; i < cycles; ++i)
        vector detections;
        Mat input = input_frame.clone();
        detections = pre_process(input, net);
        img = post_process(input, detections, class_list);
        vector layersTimes;
        double t = net.getPerfProfile(layersTimes);
        total_time = total_time + t;
        cout << format("Cycle [%d]:\t%.2f\tms", i + 1, t / freq) << endl;

    double avg_time = total_time / cycles;
    string label = format("Average inference time : %.2f ms", avg_time / freq);
    cout << label << endl;
    putText(img, label, Point(20, 40), FONT_FACE, FONT_SCALE, RED);

    string model_path = "model_s/yolov6n.onnx";// argv[1]//yolov6n.onnx  yolov6s_base_bs1.onnx
    int start_index = model_path.rfind("/");
    string model_name = model_path.substr(start_index + 1, model_path.length() - start_index - 6);
    imshow("C++_" + model_name, img);

    return 0;

