54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT

基本思想:记录一下yolov7训练人头检测和部署oak的vpu相机上和rk3399 pro开发板上,完成需求

一、准备数据集,来自官方数据集,存在问题,已经修正

链接: https://pan.baidu.com/s/1SUGKRgxeV7ddZpLdILhOwA?pwd=atjt 提取码: atjt

二、在现有的数据集基础上转yolo数据集

ubuntu@ubuntu:~$ git clone https://github.com/WongKinYiu/yolov7.git
Cloning into 'yolov7'...
remote: Enumerating objects: 1094, done.
remote: Total 1094 (delta 0), reused 0 (delta 0), pack-reused 1094
Receiving objects: 100% (1094/1094), 69.89 MiB | 2.21 MiB/s, done.
Resolving deltas: 100% (517/517), done.

下载权重测试一下

ubuntu@ubuntu:~/yolov7/weights$
ubuntu@ubuntu:~/yolov7/weights$ wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
ubuntu@ubuntu:~/yolov7/weights$ cd ..

测试一下

ssh://[email protected]:22/usr/bin/python -u /home/ubuntu/yolov7/detect.py
Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', no_trace=False, nosave=False, project='runs/detect', save_conf=False, save_txt=False, source='inference/images', update=False, view_img=False, weights='yolov7.pt')
YOLOR  v0.1-116-g8c0bf3f torch 1.9.0+cu111 CUDA:0 (NVIDIA GeForce RTX 3060, 12053.0MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Model Summary: 306 layers, 36905341 parameters, 6652669 gradients, 104.5 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

4 persons, 1 bus, 1 tie, Done. (11.6ms) Inference, (3.0ms) NMS
 The image with the result is saved in: runs/detect/exp2/bus.jpg
5 horses, Done. (10.8ms) Inference, (0.4ms) NMS
 The image with the result is saved in: runs/detect/exp2/horses.jpg
2 persons, 1 tie, 1 cake, Done. (12.3ms) Inference, (0.4ms) NMS
 The image with the result is saved in: runs/detect/exp2/image1.jpg
2 persons, 1 sports ball, Done. (10.9ms) Inference, (0.3ms) NMS
 The image with the result is saved in: runs/detect/exp2/image2.jpg
1 dog, 1 horse, Done. (10.7ms) Inference, (0.3ms) NMS
 The image with the result is saved in: runs/detect/exp2/image3.jpg
3 persons, 1 tie, Done. (9.6ms) Inference, (0.4ms) NMS
 The image with the result is saved in: runs/detect/exp2/zidane.jpg
Done. (0.335s)

Process finished with exit code 0

测试结果

三、转数据集

1)划分文件

# coding:utf-8

import os
import random
import argparse

parser = argparse.ArgumentParser()
# xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='/home/ubuntu/yolov7/trainData/Annotations', type=str, help='input xml label path')
# 数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='/home/ubuntu/yolov7/trainData/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

for i in list_index:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        if i in train:
            file_train.write(name)
        else:
            file_val.write(name)


file_train.close()
file_val.close()

2)生成标签

# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
from os import getcwd

sets = ['train', 'val']
classes = ["person"]  # 改成自己的类别voc2012


def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h


def convert_annotation(image_id):
    in_file = open('/home/ubuntu/yolov7/trainData/Annotations/%s.xml' % (image_id))
    out_file = open('/home/ubuntu/yolov7/trainData/labels/%s.txt' % (image_id), 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    for obj in root.iter('object'):
        difficult = None
        if obj.find('Difficult') is None:
            difficult = obj.find('difficult').text
        else:
            difficult = obj.find('Difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        b1, b2, b3, b4 = b
        # 标注越界修正
        if b2 > w:
            b2 = w
        if b4 > h:
            b4 = h
        b = (b1, b2, b3, b4)
        bb = convert((w, h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


wd = getcwd()
for image_set in sets:
    if not os.path.exists('/home/ubuntu/yolov7/trainData/labels/'):
        os.makedirs('/home/ubuntu/yolov7/trainData/labels/')
    image_ids = open('/home/ubuntu/yolov7/trainData/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
    list_file = open('/home/ubuntu/yolov7/trainData/%s.txt' % (image_set), 'w')
    for image_id in image_ids:
        list_file.write('/home/ubuntu/yolov7/trainData/images/%s.jpg\n' % (image_id))
        convert_annotation(image_id)
    list_file.close()
print("voc_scrip finish")

四、修改coco.yaml

# COCO 2017 dataset http://cocodataset.org

# download command/URL (optional)
#download: bash ./scripts/get_coco.sh

# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: /home/ubuntu/yolov7/trainData/train.txt  # 118287 images
val: /home/ubuntu/yolov7/trainData/val.txt  # 5000 images
#test: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

# number of classes
nc: 1

# class names
names: [ 'person']

 修改yolov7.yaml

nc: 1  # number of classes

五、开始训练

ubuntu@ubuntu:~/yolov7$ python3 train.py --weights weights/yolov7.pt --cfg cfg/training/yolov7.yaml --data data/coco.yaml --device 0 --batch-size 8 --epoch 1000 --hyp data/hyp.scratch.p5.yaml

测试

ubuntu@ubuntu:~/yolov7$ python3 detect.py --weights runs/train/exp3/weights/best.pt 
Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', no_trace=False, nosave=False, project='runs/detect', save_conf=False, save_txt=False, source='inference/images', update=False, view_img=False, weights=['runs/train/exp3/weights/best.pt'])
YOLOR  v0.1-116-g8c0bf3f torch 1.9.0+cu111 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 3910.5MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

3 persons, Done. (22.3ms) Inference, (5.9ms) NMS
 The image with the result is saved in: runs/detect/exp3/bus.jpg
Done. (18.4ms) Inference, (0.2ms) NMS
 The image with the result is saved in: runs/detect/exp3/horses.jpg
4 persons, Done. (21.1ms) Inference, (0.6ms) NMS
 The image with the result is saved in: runs/detect/exp3/image1.jpg
2 persons, Done. (18.7ms) Inference, (0.6ms) NMS
 The image with the result is saved in: runs/detect/exp3/image2.jpg
2 persons, Done. (17.9ms) Inference, (0.6ms) NMS
 The image with the result is saved in: runs/detect/exp3/image3.jpg
2 persons, Done. (15.5ms) Inference, (0.6ms) NMS
 The image with the result is saved in: runs/detect/exp3/zidane.jpg
Done. (0.550s)

测试结果(发现尴尬的不,把马脸和狗脸识别成人头了。。。。)

六、转模型到onnx,不要把nms加

ubuntu@ubuntu:~/yolov7$ python3 export.py --weights runs/train/exp3/weights/best.pt --simplify --img-size 640 640 
Import onnx_graphsurgeon failure: No module named 'onnx_graphsurgeon'
Namespace(batch_size=1, conf_thres=0.25, device='cpu', dynamic=False, dynamic_batch=False, end2end=False, fp16=False, grid=False, img_size=[640, 640], include_nms=False, int8=False, iou_thres=0.45, max_wh=None, simplify=True, topk_all=100, weights='runs/train/exp3/weights/best.pt')
YOLOR  v0.1-116-g8c0bf3f torch 1.9.0+cu111 CPU

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPS

Starting TorchScript export with torch 1.9.0+cu111...
TorchScript export success, saved as runs/train/exp3/weights/best.torchscript.pt
CoreML export failure: No module named 'coremltools'

Starting TorchScript-Lite export with torch 1.9.0+cu111...
TorchScript-Lite export success, saved as runs/train/exp3/weights/best.torchscript.ptl

Starting ONNX export with onnx 1.12.0...

Starting to simplify ONNX...
ONNX export success, saved as runs/train/exp3/weights/best.onnx

Export complete (11.29s). Visualize with https://github.com/lutzroeder/netron.

七、转ncnn,直接调用ncnn官方的example即可

ubuntu@ubuntu:~/ncnn/build/install/bin$ ./onnx2ncnn /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx /home/ubuntu/yolov7/runs/train/exp3/weights/best.param /home/ubuntu/yolov7/runs/train/exp3/weights/best.bin

参考源码https://github.com/Tencent/ncnn/tree/master/examples

模型需要改掉后面的param文件这三个红框改成-1,否则会出现乱框

54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT_第1张图片

 cmakelists.txt

cmake_minimum_required(VERSION 3.16)
project(untitled22)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/ncnn)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libncnn STATIC IMPORTED)
set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)


add_executable(untitled22 main.cpp)
target_link_libraries(untitled22 ${OpenCV_LIBS} libncnn )

main.cpp

// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "layer.h"
#include "net.h"

#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include 
#include 
#include 
#endif
#include 
#include 
#include 

#define MAX_STRIDE 32

struct Object
{
    cv::Rect_ rect;
    int label;
    float prob;
};

static inline float intersection_area(const Object& a, const Object& b)
{
    cv::Rect_ inter = a.rect & b.rect;
    return inter.area();
}

static void qsort_descent_inplace(std::vector& objects, int left, int right)
{
    int i = left;
    int j = right;
    float p = objects[(left + right) / 2].prob;

    while (i <= j)
    {
        while (objects[i].prob > p)
            i++;

        while (objects[j].prob < p)
            j--;

        if (i <= j)
        {
            // swap
            std::swap(objects[i], objects[j]);

            i++;
            j--;
        }
    }

#pragma omp parallel sections
    {
#pragma omp section
        {
            if (left < j) qsort_descent_inplace(objects, left, j);
        }
#pragma omp section
        {
            if (i < right) qsort_descent_inplace(objects, i, right);
        }
    }
}

static void qsort_descent_inplace(std::vector& objects)
{
    if (objects.empty())
        return;

    qsort_descent_inplace(objects, 0, objects.size() - 1);
}

static void nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked, float nms_threshold, bool agnostic = false)
{
    picked.clear();

    const int n = faceobjects.size();

    std::vector areas(n);
    for (int i = 0; i < n; i++)
    {
        areas[i] = faceobjects[i].rect.area();
    }

    for (int i = 0; i < n; i++)
    {
        const Object& a = faceobjects[i];

        int keep = 1;
        for (int j = 0; j < (int)picked.size(); j++)
        {
            const Object& b = faceobjects[picked[j]];

            if (!agnostic && a.label != b.label)
                continue;

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            // float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;
        }

        if (keep)
            picked.push_back(i);
    }
}

static inline float sigmoid(float x)
{
    return static_cast(1.f / (1.f + exp(-x)));
}

static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector& objects)
{
    const int num_grid = feat_blob.h;

    int num_grid_x;
    int num_grid_y;
    if (in_pad.w > in_pad.h)
    {
        num_grid_x = in_pad.w / stride;
        num_grid_y = num_grid / num_grid_x;
    }
    else
    {
        num_grid_y = in_pad.h / stride;
        num_grid_x = num_grid / num_grid_y;
    }

    const int num_class = feat_blob.w - 5;

    const int num_anchors = anchors.w / 2;

    for (int q = 0; q < num_anchors; q++)
    {
        const float anchor_w = anchors[q * 2];
        const float anchor_h = anchors[q * 2 + 1];

        const ncnn::Mat feat = feat_blob.channel(q);

        for (int i = 0; i < num_grid_y; i++)
        {
            for (int j = 0; j < num_grid_x; j++)
            {
                const float* featptr = feat.row(i * num_grid_x + j);
                float box_confidence = sigmoid(featptr[4]);
                if (box_confidence >= prob_threshold)
                {
                    // find class index with max class score
                    int class_index = 0;
                    float class_score = -FLT_MAX;
                    for (int k = 0; k < num_class; k++)
                    {
                        float score = featptr[5 + k];
                        if (score > class_score)
                        {
                            class_index = k;
                            class_score = score;
                        }
                    }
                    float confidence = box_confidence * sigmoid(class_score);
                    if (confidence >= prob_threshold)
                    {
                        float dx = sigmoid(featptr[0]);
                        float dy = sigmoid(featptr[1]);
                        float dw = sigmoid(featptr[2]);
                        float dh = sigmoid(featptr[3]);

                        float pb_cx = (dx * 2.f - 0.5f + j) * stride;
                        float pb_cy = (dy * 2.f - 0.5f + i) * stride;

                        float pb_w = pow(dw * 2.f, 2) * anchor_w;
                        float pb_h = pow(dh * 2.f, 2) * anchor_h;

                        float x0 = pb_cx - pb_w * 0.5f;
                        float y0 = pb_cy - pb_h * 0.5f;
                        float x1 = pb_cx + pb_w * 0.5f;
                        float y1 = pb_cy + pb_h * 0.5f;

                        Object obj;
                        obj.rect.x = x0;
                        obj.rect.y = y0;
                        obj.rect.width = x1 - x0;
                        obj.rect.height = y1 - y0;
                        obj.label = class_index;
                        obj.prob = confidence;

                        objects.push_back(obj);
                    }
                }
            }
        }
    }
}

static int detect_yolov7(const cv::Mat& bgr, std::vector& objects)
{
    ncnn::Net yolov7;

    yolov7.opt.use_vulkan_compute = true;
    // yolov7.opt.use_bf16_storage = true;

    // original pretrained model from https://github.com/WongKinYiu/yolov7
    // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
    yolov7.load_param("/home/ubuntu/yolov7/runs/train/exp3/weights/best.param");
    yolov7.load_model("/home/ubuntu/yolov7/runs/train/exp3/weights/best.bin");

    const int target_size = 640;
    const float prob_threshold = 0.25f;
    const float nms_threshold = 0.45f;

    int img_w = bgr.cols;
    int img_h = bgr.rows;

    // letterbox pad to multiple of MAX_STRIDE
    int w = img_w;
    int h = img_h;
    float scale = 1.f;
    if (w > h)
    {
        scale = (float)target_size / w;
        w = target_size;
        h = h * scale;
    }
    else
    {
        scale = (float)target_size / h;
        h = target_size;
        w = w * scale;
    }

    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);

    int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
    int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
    ncnn::Mat in_pad;
    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);

    const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
    in_pad.substract_mean_normalize(0, norm_vals);

    ncnn::Extractor ex = yolov7.create_extractor();

    ex.input("images", in_pad);

    std::vector proposals;

    // stride 8
    {
        ncnn::Mat out;
        ex.extract("output", out);

        ncnn::Mat anchors(6);
        anchors[0] = 12.f;
        anchors[1] = 16.f;
        anchors[2] = 19.f;
        anchors[3] = 36.f;
        anchors[4] = 40.f;
        anchors[5] = 28.f;

        std::vector objects8;
        generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);

        proposals.insert(proposals.end(), objects8.begin(), objects8.end());
    }

    // stride 16
    {
        ncnn::Mat out;

        ex.extract("534", out);

        ncnn::Mat anchors(6);
        anchors[0] = 36.f;
        anchors[1] = 75.f;
        anchors[2] = 76.f;
        anchors[3] = 55.f;
        anchors[4] = 72.f;
        anchors[5] = 146.f;

        std::vector objects16;
        generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);

        proposals.insert(proposals.end(), objects16.begin(), objects16.end());
    }

    // stride 32
    {
        ncnn::Mat out;

        ex.extract("554", out);

        ncnn::Mat anchors(6);
        anchors[0] = 142.f;
        anchors[1] = 110.f;
        anchors[2] = 192.f;
        anchors[3] = 243.f;
        anchors[4] = 459.f;
        anchors[5] = 401.f;

        std::vector objects32;
        generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);

        proposals.insert(proposals.end(), objects32.begin(), objects32.end());
    }

    // sort all proposals by score from highest to lowest
    qsort_descent_inplace(proposals);

    // apply nms with nms_threshold
    std::vector picked;
    nms_sorted_bboxes(proposals, picked, nms_threshold);

    int count = picked.size();

    objects.resize(count);
    for (int i = 0; i < count; i++)
    {
        objects[i] = proposals[picked[i]];

        // adjust offset to original unpadded
        float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
        float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
        float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
        float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;

        // clip
        x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
        y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
        x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
        y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);

        objects[i].rect.x = x0;
        objects[i].rect.y = y0;
        objects[i].rect.width = x1 - x0;
        objects[i].rect.height = y1 - y0;
    }

    return 0;
}

static void draw_objects(const cv::Mat& bgr, const std::vector& objects)
{
    static const char* class_names[] = {
            "person"
    };

    static const unsigned char colors[19][3] = {
            {54, 67, 244},
            {99, 30, 233},
            {176, 39, 156},
            {183, 58, 103},
            {181, 81, 63},
            {243, 150, 33},
            {244, 169, 3},
            {212, 188, 0},
            {136, 150, 0},
            {80, 175, 76},
            {74, 195, 139},
            {57, 220, 205},
            {59, 235, 255},
            {7, 193, 255},
            {0, 152, 255},
            {34, 87, 255},
            {72, 85, 121},
            {158, 158, 158},
            {139, 125, 96}
    };

    int color_index = 0;

    cv::Mat image = bgr.clone();

    for (size_t i = 0; i < objects.size(); i++)
    {
        const Object& obj = objects[i];

        const unsigned char* color = colors[color_index % 19];
        color_index++;

        cv::Scalar cc(color[0], color[1], color[2]);

        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

        cv::rectangle(image, obj.rect, cc, 2);

        char text[256];
        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int x = obj.rect.x;
        int y = obj.rect.y - label_size.height - baseLine;
        if (y < 0)
            y = 0;
        if (x + label_size.width > image.cols)
            x = image.cols - label_size.width;

        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cc, -1);

        cv::putText(image, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));
    }

    cv::imshow("image", image);
    cv::waitKey(0);
}

int main(int argc, char** argv)
{


    cv::Mat m = cv::imread("/home/ubuntu/yolov7/inference/images/bus.jpg");
    if (m.empty())
    {

        return -1;
    }

    std::vector objects;
    detect_yolov7(m, objects);

    draw_objects(m, objects);

    return 0;
}

测试结果

 八、mnn测试结果

ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx --MNNModel /home/ubuntu/yolov7/runs/train/exp3/weights/best.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[10:36:41] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ images, ]
outputTensors: [ 534, 554, output, ]
Converted Success!

cmakelists.txt

cmake_minimum_required(VERSION 3.16)
project(untitled22)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/MNN)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)


add_executable(untitled22 main.cpp)
target_link_libraries(untitled22 ${OpenCV_LIBS} libmnn )

main.cpp


#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include
#include
using namespace std;
using namespace cv;

typedef struct {
        int width;
        int height;
} YoloSize;


typedef struct {
    std::string name;
    int stride;
    std::vector anchors;
} YoloLayerData;

class BoxInfo
{
public:
    int x1,y1,x2,y2,label,id;
    float score;
};

static inline float sigmoid(float x)
{
    return static_cast(1.f / (1.f + exp(-x)));
}
double GetIOU(cv::Rect_ bb_test, cv::Rect_ bb_gt)
{
    float in = (bb_test & bb_gt).area();
    float un = bb_test.area() + bb_gt.area() - in;

    if (un < DBL_EPSILON)
        return 0;

    return (double)(in / un);
}
std::vector decode_infer(MNN::Tensor & data, int stride,  int net_size, int num_classes,
             const std::vector &anchors, float threshold)
{
    std::vector result;
    int batchs, channels, height, width, pred_item ;
    batchs = data.shape()[0];
    channels = data.shape()[1];
    height = data.shape()[2];
    width = data.shape()[3];
    pred_item = data.shape()[4];

    auto data_ptr = data.host();
    for(int bi=0; bi threshold)
                        {
                            float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;
                            float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;
                            float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;
                            float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;

                            BoxInfo box;

                            box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f) )));
                            box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f) )));
                            box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f) )));
                            box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f) )));
                            box.score = score;
                            box.label = cls_id;
                            result.push_back(box);
                        }
                    }
                }
            }
        }
    }

    return result;
}

void nms(std::vector &input_boxes, float NMS_THRESH) {
    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
    std::vector vArea(input_boxes.size());
    for (int i = 0; i < int(input_boxes.size()); ++i) {
        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
                   * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
    }
    for (int i = 0; i < int(input_boxes.size()); ++i) {
        for (int j = i + 1; j < int(input_boxes.size());) {
            float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
            float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
            float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
            float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
            float w = std::max(float(0), xx2 - xx1 + 1);
            float h = std::max(float(0), yy2 - yy1 + 1);
            float inter = w * h;
            float ovr = inter / (vArea[i] + vArea[j] - inter);
            if (ovr >= NMS_THRESH) {
                input_boxes.erase(input_boxes.begin() + j);
                vArea.erase(vArea.begin() + j);
            } else {
                j++;
            }
        }
    }
}
void scale_coords(std::vector &boxes, int w_from, int h_from, int w_to, int h_to)
{
    float w_ratio = float(w_to)/float(w_from);
    float h_ratio = float(h_to)/float(h_from);


    for(auto &box: boxes)
    {
        box.x1 *= w_ratio;
        box.x2 *= w_ratio;
        box.y1 *= h_ratio;
        box.y2 *= h_ratio;
    }
    return ;
}

cv::Mat draw_box(cv::Mat & cv_mat, std::vector &boxes, const std::vector &labels,unsigned char colors[][3])
{

    for(auto box : boxes)
    {
        int width = box.x2-box.x1;
        int height = box.y2-box.y1;
        cv::Point p = cv::Point(box.x1, box.y1);
        cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);
        cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));
        string text = labels[box.label] + ":" + std::to_string(box.score) ;
        cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));
    }
    return cv_mat;
}

int main(int argc, char **argv) {


    std::vector labels = {
            "person"
    };
     unsigned char colors[][3] = {
            {255, 0, 0}
    };

    cv::Mat bgr = cv::imread("/home/ubuntu/yolov7/inference/images/bus.jpg");;// 预处理和源码不太一样,所以影响了后面的

    int target_size = 640;

    cv::Mat resize_img;
    cv::resize(bgr, resize_img, cv::Size(target_size, target_size));
    float cls_threshold = 0.25;

    // MNN inference
    auto mnnNet = std::shared_ptr(
            MNN::Interpreter::createFromFile("/home/ubuntu/yolov7/runs/train/exp3/weights/best.mnn"));
    auto t1 = std::chrono::steady_clock::now();
    MNN::ScheduleConfig netConfig;
    netConfig.type = MNN_FORWARD_CPU;
    netConfig.numThread = 4;

    auto session = mnnNet->createSession(netConfig);
    auto input = mnnNet->getSessionInput(session, nullptr);

    mnnNet->resizeTensor(input, {1, 3, (int) target_size, (int) target_size});
    mnnNet->resizeSession(session);
    MNN::CV::ImageProcess::Config config;

    const float mean_vals[3] = {0, 0, 0};

    const float norm_255[3] = {1.f / 255, 1.f / 255.f, 1.f / 255};

    std::shared_ptr pretreat(
            MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,
                                          norm_255, 3));

    pretreat->convert(resize_img.data, (int) target_size, (int) target_size, resize_img.step[0], input);


    mnnNet->runSession(session);

    std::vector yolov5s_layers{
            {"554",    32, {{142, 110}, {192, 243}, {459, 401}}},
            {"534",    16, {{36,  75}, {76,  55},  {72,  146}}},
            {"output", 8,  {{12,  16}, {19,  36},  {40,  28}}},
    };

    auto output = mnnNet->getSessionOutput(session, yolov5s_layers[2].name.c_str());

    MNN::Tensor outputHost(output, output->getDimensionType());
    output->copyToHostTensor(&outputHost);

    //毫秒级
    std::vector vec_scores;
    std::vector vec_new_scores;
    std::vector vec_labels;
    int outputHost_shape_c = outputHost.channel();
    int outputHost_shape_d = outputHost.dimensions();
    int outputHost_shape_w = outputHost.width();
    int outputHost_shape_h = outputHost.height();

    printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d outputHost.elementSize()=%d\n", outputHost_shape_d,
           outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, outputHost.elementSize());
    auto yolov7_534 = mnnNet->getSessionOutput(session, yolov5s_layers[1].name.c_str());

    MNN::Tensor output_534_Host(yolov7_534, yolov7_534->getDimensionType());
    yolov7_534->copyToHostTensor(&output_534_Host);


    outputHost_shape_c = output_534_Host.channel();
    outputHost_shape_d = output_534_Host.dimensions();
    outputHost_shape_w = output_534_Host.width();
    outputHost_shape_h = output_534_Host.height();
    printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d output_534_Host.elementSize()=%d\n", outputHost_shape_d,
           outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, output_534_Host.elementSize());

    auto yolov7_554 = mnnNet->getSessionOutput(session, yolov5s_layers[0].name.c_str());

    MNN::Tensor output_544_Host(yolov7_554, yolov7_554->getDimensionType());
    yolov7_554->copyToHostTensor(&output_544_Host);


    outputHost_shape_c = output_544_Host.channel();
    outputHost_shape_d = output_544_Host.dimensions();
    outputHost_shape_w = output_544_Host.width();
    outputHost_shape_h = output_544_Host.height();
    printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d output_544_Host.elementSize()=%d\n", outputHost_shape_d,
           outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, output_544_Host.elementSize());


    std::vector & layers = yolov5s_layers;

    std::vector result;
    std::vector boxes;
    float threshold = 0.3;
    float nms_threshold = 0.7;



    boxes = decode_infer(outputHost, layers[2].stride, target_size, labels.size(), layers[2].anchors, threshold);
    result.insert(result.begin(), boxes.begin(), boxes.end());

    boxes = decode_infer(output_534_Host, layers[1].stride, target_size, labels.size(), layers[1].anchors, threshold);
    result.insert(result.begin(), boxes.begin(), boxes.end());

    boxes = decode_infer(output_544_Host, layers[0].stride, target_size, labels.size(), layers[0].anchors, threshold);
    result.insert(result.begin(), boxes.begin(), boxes.end());

    nms(result, nms_threshold);
    scale_coords(result, target_size, target_size, bgr.cols, bgr.rows);
    cv::Mat frame_show = draw_box(bgr, result, labels,colors);
    cv::imshow("out",bgr);
    cv::imwrite("dp.jpg",bgr);
    cv::waitKey(0);
    mnnNet->releaseModel();
    mnnNet->releaseSession(session);
    return 0;
}

测试结果

 九、rk3399 pro

转模型

from rknn.api import RKNN

ONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'
RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.rknn'

if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',
                target_platform='rk3399pro',
                quantized_dtype='asymmetric_affine-u8', optimization_level=3, output_optimize=1)
    print('done')

    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load model  failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset='dataset.txt')  # ,pre_compile=True
    if ret != 0:
        print('Build yolov5s failed!')
        exit(ret)
    print('done')

    # Export rknn model
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export yolov5s.rknn failed!')
        exit(ret)
    print('done')

    rknn.release()

py测试

import os
import numpy as np
import cv2
from rknn.api import RKNN


ONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'
RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.rknn'
IMG_PATH = './bus.jpg'
DATASET = './dataset.txt'

QUANTIZE_ON = True

BOX_THRESH = 0.5
NMS_THRESH = 0.6
IMG_SIZE = 640

CLASSES = ("person")
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def xywh2xyxy(x):
    # Convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y

def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE/grid_h)

    box_wh = pow(sigmoid(input[..., 2:4])*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!

    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.

    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    box_classes = np.argmax(box_class_probs, axis=-1)
    box_class_scores = np.max(box_class_probs, axis=-1)
    pos = np.where(box_confidences[...,0] >= BOX_THRESH)


    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov7_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[12,16], [ 19,36], [40,28],
               [36,75], [76,55],[72,146],
               [142,110], [192,243], [459,401]]

    boxes, classes, scores = [], [], []
    for input,mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores

def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
        top = int(top)
        left = int(left)
        right = int(right)
        bottom = int(bottom)

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)


def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)


if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN()

    if not os.path.exists(RKNN_MODEL):
        print("model not exist")
        exit(-1)

    # Load ONNX model
    print("--> Loading model")
    ret = rknn.load_rknn(RKNN_MODEL)
    if ret != 0:
        print("Load rknn model failed!")
        exit(ret)
    print("done")
    # init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    # ret = rknn.init_runtime('rk1808', device_id='1808')
    if ret != 0:
        print('Init runtime environment failed')
        exit(ret)
    print('done')

    # Set inputs
    img = cv2.imread(IMG_PATH)
    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))

    # Inference
    print('--> Running model')
    outputs = rknn.inference(inputs=[img])

    # post process
    input0_data = outputs[0]
    input1_data = outputs[1]
    input2_data = outputs[2]

    input0_data=np.transpose(input0_data, (0, 1, 4, 2,3))
    input1_data=np.transpose(input1_data, (0, 1, 4, 2,3))
    input2_data =np.transpose(input2_data, (0, 1, 4, 2,3))

    input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

    boxes, classes, scores = yolov7_post_process(input_data)

    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
    cv2.imshow("post process result", img_1)
    cv2.imwrite("post.jpg",img_1)
    cv2.waitKeyEx(0)

    rknn.release()

测试结果

54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT_第2张图片

惊奇使用netron发现

54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT_第3张图片

在reshape之后,就是1×3×6×20×20,那其实可以使用这样写,就不用修改官方的demo模型了 

这里有三种方案,第一种方式直接取shape的节点编码,缺点是python可以,第二种方式修改pytorch源代码去掉生成transpose,第三种方式是修改onnx 直接remove后面两个节点

也可以这样搞

from rknn.api import RKNN

ONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'
RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best_remove_transpose.rknn'

if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',
                target_platform='rk3399pro',
                quantized_dtype='asymmetric_affine-u8', optimization_level=3, output_optimize=1)
    print('done')

    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL,outputs=[513,533,553])
    if ret != 0:
        print('Load model  failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset='dataset.txt')  # ,pre_compile=True
    if ret != 0:
        print('Build yolov5s failed!')
        exit(ret)
    print('done')

    # Export rknn model
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export yolov5s.rknn failed!')
        exit(ret)
    print('done')

    rknn.release()

测试就直接套用yolov5的测试代码

import os
import numpy as np
import cv2
from rknn.api import RKNN


ONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'
RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best_remove_transpose.rknn'
IMG_PATH = './bus.jpg'
DATASET = './dataset.txt'

QUANTIZE_ON = True

BOX_THRESH = 0.5
NMS_THRESH = 0.6
IMG_SIZE = 640

CLASSES = ("person")
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def xywh2xyxy(x):
    # Convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y

def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE/grid_h)

    box_wh = pow(sigmoid(input[..., 2:4])*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!

    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.

    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    box_classes = np.argmax(box_class_probs, axis=-1)
    box_class_scores = np.max(box_class_probs, axis=-1)
    pos = np.where(box_confidences[...,0] >= BOX_THRESH)


    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov7_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[12,16], [ 19,36], [40,28],
               [36,75], [76,55],[72,146],
               [142,110], [192,243], [459,401]]

    boxes, classes, scores = [], [], []
    for input,mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores

def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
        top = int(top)
        left = int(left)
        right = int(right)
        bottom = int(bottom)

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)


def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)


if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN()

    if not os.path.exists(RKNN_MODEL):
        print("model not exist")
        exit(-1)

    # Load ONNX model
    print("--> Loading model")
    ret = rknn.load_rknn(RKNN_MODEL)
    if ret != 0:
        print("Load rknn model failed!")
        exit(ret)
    print("done")
    # init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    # ret = rknn.init_runtime('rk1808', device_id='1808')
    if ret != 0:
        print('Init runtime environment failed')
        exit(ret)
    print('done')

    # Set inputs
    img = cv2.imread(IMG_PATH)
    # img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))

    # Inference
    print('--> Running model')
    outputs = rknn.inference(inputs=[img])

    # post process
    input0_data = outputs[0]
    input1_data = outputs[1]
    input2_data = outputs[2]

    # input0_data=np.transpose(input0_data, (0, 1, 4, 2,3))
    # input1_data=np.transpose(input1_data, (0, 1, 4, 2,3))
    # input2_data =np.transpose(input2_data, (0, 1, 4, 2,3))

    input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))

    print(input0_data.shape)
    print(input1_data.shape)
    print(input2_data.shape)

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))

    print(np.transpose(input0_data, (2, 3, 0, 1)).shape)
    print(np.transpose(input1_data, (2, 3, 0, 1)).shape)
    print(np.transpose(input2_data, (2, 3, 0, 1)).shape)

    boxes, classes, scores = yolov7_post_process(input_data)

    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
    cv2.imshow("post process result", img_1)
    cv2.waitKeyEx(0)

    rknn.release()

结果是一样的

54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT_第4张图片

 c++测试 rk3399 pro

cmakelists.txt

cmake_minimum_required(VERSION 3.16)
project(untitled10)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")

include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库
add_library(librknn_api SHARED IMPORTED)
set_target_properties(librknn_api PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib64/librknn_api.so)


add_executable(untitled10 main.cpp)
target_link_libraries(untitled10 ${OpenCV_LIBS} librknn_api )

main.cpp

#include 
#include 
#include 
#include 
#include "rknn_api.h"
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include 
#define OBJ_NAME_MAX_SIZE 16
#define OBJ_NUMB_MAX_SIZE 200
#define OBJ_CLASS_NUM     1
#define PROP_BOX_SIZE     (5+OBJ_CLASS_NUM)
using namespace std;

typedef struct _BOX_RECT {
    int left;
    int right;
    int top;
    int bottom;
} BOX_RECT;

typedef struct __detect_result_t {
    char name[OBJ_NAME_MAX_SIZE];
    int class_index;
    BOX_RECT box;
    float prop;
} detect_result_t;

typedef struct _detect_result_group_t {
    int id;
    int count;
    detect_result_t results[OBJ_NUMB_MAX_SIZE];
} detect_result_group_t;

const int anchor0[6] = {12,16, 19,36, 40,28};
const int anchor1[6] = {36,75, 76,55, 72,146};
const int anchor2[6] = {142,110, 192,243, 459,401};

void printRKNNTensor(rknn_tensor_attr *attr) {
    printf("index=%d name=%s n_dims=%d dims=[%d %d %d %d] n_elems=%d size=%d "
           "fmt=%d type=%d qnt_type=%d fl=%d zp=%d scale=%f\n",
           attr->index, attr->name, attr->n_dims, attr->dims[3], attr->dims[2],
           attr->dims[1], attr->dims[0], attr->n_elems, attr->size, 0, attr->type,
           attr->qnt_type, attr->fl, attr->zp, attr->scale);
}

float sigmoid(float x) {
    return 1.0 / (1.0 + expf(-x));
}

float unsigmoid(float y) {
    return -1.0 * logf((1.0 / y) - 1.0);
}

int process_fp(float *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,
               std::vector &boxes, std::vector &boxScores, std::vector &classId,
               float threshold) {

    int validCount = 0;
    int grid_len = grid_h * grid_w;
    float thres_sigmoid = unsigmoid(threshold);
    for (int a = 0; a < 3; a++) {
        for (int i = 0; i < grid_h; i++) {
            for (int j = 0; j < grid_w; j++) {
                float box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];
                if (box_confidence >= thres_sigmoid) {
                    int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;
                    float *in_ptr = input + offset;
                    float box_x = sigmoid(*in_ptr) * 2.0 - 0.5;
                    float box_y = sigmoid(in_ptr[grid_len]) * 2.0 - 0.5;
                    float box_w = sigmoid(in_ptr[2 * grid_len]) * 2.0;
                    float box_h = sigmoid(in_ptr[3 * grid_len]) * 2.0;
                    box_x = (box_x + j) * (float) stride;
                    box_y = (box_y + i) * (float) stride;
                    box_w = box_w * box_w * (float) anchor[a * 2];
                    box_h = box_h * box_h * (float) anchor[a * 2 + 1];
                    box_x -= (box_w / 2.0);
                    box_y -= (box_h / 2.0);
                    boxes.push_back(box_x);
                    boxes.push_back(box_y);
                    boxes.push_back(box_w);
                    boxes.push_back(box_h);

                    float maxClassProbs = in_ptr[5 * grid_len];
                    int maxClassId = 0;
                    for (int k = 1; k < OBJ_CLASS_NUM; ++k) {
                        float prob = in_ptr[(5 + k) * grid_len];
                        if (prob > maxClassProbs) {
                            maxClassId = k;
                            maxClassProbs = prob;
                        }
                    }
                    float box_conf_f32 = sigmoid(box_confidence);
                    float class_prob_f32 = sigmoid(maxClassProbs);
                    boxScores.push_back(box_conf_f32 * class_prob_f32);
                    classId.push_back(maxClassId);
                    validCount++;
                }
            }
        }
    }
    return validCount;
}

float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
                       float ymax1) {
    float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
    float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
    float i = w * h;
    float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
    return u <= 0.f ? 0.f : (i / u);
}

int nms(int validCount, std::vector &outputLocations, std::vector &order, float threshold) {
    for (int i = 0; i < validCount; ++i) {
        if (order[i] == -1) {
            continue;
        }
        int n = order[i];
        for (int j = i + 1; j < validCount; ++j) {
            int m = order[j];
            if (m == -1) {
                continue;
            }
            float xmin0 = outputLocations[n * 4 + 0];
            float ymin0 = outputLocations[n * 4 + 1];
            float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
            float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];

            float xmin1 = outputLocations[m * 4 + 0];
            float ymin1 = outputLocations[m * 4 + 1];
            float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
            float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];

            float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);

            if (iou > threshold) {
                order[j] = -1;
            }
        }
    }
    return 0;
}

int quick_sort_indice_inverse(
        std::vector &input,
        int left,
        int right,
        std::vector &indices) {
    float key;
    int key_index;
    int low = left;
    int high = right;
    if (left < right) {
        key_index = indices[left];
        key = input[left];
        while (low < high) {
            while (low < high && input[high] <= key) {
                high--;
            }
            input[low] = input[high];
            indices[low] = indices[high];
            while (low < high && input[low] >= key) {
                low++;
            }
            input[high] = input[low];
            indices[high] = indices[low];
        }
        input[low] = key;
        indices[low] = key_index;
        quick_sort_indice_inverse(input, left, low - 1, indices);
        quick_sort_indice_inverse(input, low + 1, right, indices);
    }
    return low;
}

int clamp(float val, int min, int max) {
    return val > min ? (val < max ? val : max) : min;
}

int post_process_fp(float *input0, float *input1, float *input2, int model_in_h, int model_in_w,
                    int h_offset, int w_offset, float resize_scale, float conf_threshold, float nms_threshold,
                    detect_result_group_t *group, const char *labels[]) {
    memset(group, 0, sizeof(detect_result_group_t));
    std::vector filterBoxes;
    std::vector boxesScore;
    std::vector classId;
    int stride0 = 8;
    int grid_h0 = model_in_h / stride0;
    int grid_w0 = model_in_w / stride0;
    int validCount0 = 0;
    validCount0 = process_fp(input0, (int *) anchor0, grid_h0, grid_w0, model_in_h, model_in_w,
                             stride0, filterBoxes, boxesScore, classId, conf_threshold);

    int stride1 = 16;
    int grid_h1 = model_in_h / stride1;
    int grid_w1 = model_in_w / stride1;
    int validCount1 = 0;
    validCount1 = process_fp(input1, (int *) anchor1, grid_h1, grid_w1, model_in_h, model_in_w,
                             stride1, filterBoxes, boxesScore, classId, conf_threshold);

    int stride2 = 32;
    int grid_h2 = model_in_h / stride2;
    int grid_w2 = model_in_w / stride2;
    int validCount2 = 0;
    validCount2 = process_fp(input2, (int *) anchor2, grid_h2, grid_w2, model_in_h, model_in_w,
                             stride2, filterBoxes, boxesScore, classId, conf_threshold);

    int validCount = validCount0 + validCount1 + validCount2;
    // no object detect
    if (validCount <= 0) {
        return 0;
    }

    std::vector indexArray;
    for (int i = 0; i < validCount; ++i) {
        indexArray.push_back(i);
    }

    quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray);

    nms(validCount, filterBoxes, indexArray, nms_threshold);

    int last_count = 0;
    /* box valid detect target */
    for (int i = 0; i < validCount; ++i) {

        if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= OBJ_NUMB_MAX_SIZE) {
            continue;
        }
        int n = indexArray[i];

        float x1 = filterBoxes[n * 4 + 0];
        float y1 = filterBoxes[n * 4 + 1];
        float x2 = x1 + filterBoxes[n * 4 + 2];
        float y2 = y1 + filterBoxes[n * 4 + 3];
        int id = classId[n];

        group->results[last_count].box.left = (int) ((clamp(x1, 0, model_in_w) - w_offset) / resize_scale);
        group->results[last_count].box.top = (int) ((clamp(y1, 0, model_in_h) - h_offset) / resize_scale);
        group->results[last_count].box.right = (int) ((clamp(x2, 0, model_in_w) - w_offset) / resize_scale);
        group->results[last_count].box.bottom = (int) ((clamp(y2, 0, model_in_h) - h_offset) / resize_scale);
        group->results[last_count].prop = boxesScore[i];
        group->results[last_count].class_index = id;
        const char *label = labels[id];
        strncpy(group->results[last_count].name, label, OBJ_NAME_MAX_SIZE);

        // printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top,
        //        group->results[last_count].box.right, group->results[last_count].box.bottom, label);
        last_count++;
    }
    group->count = last_count;

    return 0;
}

float deqnt_affine_to_f32(uint8_t qnt, uint8_t zp, float scale) {
    return ((float) qnt - (float) zp) * scale;
}

int32_t __clip(float val, float min, float max) {
    float f = val <= min ? min : (val >= max ? max : val);
    return f;
}

uint8_t qnt_f32_to_affine(float f32, uint8_t zp, float scale) {
    float dst_val = (f32 / scale) + zp;
    uint8_t res = (uint8_t) __clip(dst_val, 0, 255);
    return res;
}

int process_u8(uint8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,
               std::vector &boxes, std::vector &boxScores, std::vector &classId,
               float threshold, uint8_t zp, float scale) {

    int validCount = 0;
    int grid_len = grid_h * grid_w;
    float thres = unsigmoid(threshold);
    uint8_t thres_u8 = qnt_f32_to_affine(thres, zp, scale);
    for (int a = 0; a < 3; a++) {
        for (int i = 0; i < grid_h; i++) {
            for (int j = 0; j < grid_w; j++) {
                uint8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];
                if (box_confidence >= thres_u8) {
                    int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;
                    uint8_t *in_ptr = input + offset;
                    float box_x = sigmoid(deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5;
                    float box_y = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5;
                    float box_w = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0;
                    float box_h = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0;
                    box_x = (box_x + j) * (float) stride;
                    box_y = (box_y + i) * (float) stride;
                    box_w = box_w * box_w * (float) anchor[a * 2];
                    box_h = box_h * box_h * (float) anchor[a * 2 + 1];
                    box_x -= (box_w / 2.0);
                    box_y -= (box_h / 2.0);
                    boxes.push_back(box_x);
                    boxes.push_back(box_y);
                    boxes.push_back(box_w);
                    boxes.push_back(box_h);

                    uint8_t maxClassProbs = in_ptr[5 * grid_len];
                    int maxClassId = 0;
                    for (int k = 1; k < OBJ_CLASS_NUM; ++k) {
                        uint8_t prob = in_ptr[(5 + k) * grid_len];
                        if (prob > maxClassProbs) {
                            maxClassId = k;
                            maxClassProbs = prob;
                        }
                    }
                    float box_conf_f32 = sigmoid(deqnt_affine_to_f32(box_confidence, zp, scale));
                    float class_prob_f32 = sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale));
                    boxScores.push_back(box_conf_f32 * class_prob_f32);
                    classId.push_back(maxClassId);
                    validCount++;
                }
            }
        }
    }
    return validCount;
}

int post_process_u8(uint8_t *input0, uint8_t *input1, uint8_t *input2, int model_in_h, int model_in_w,
                    int h_offset, int w_offset, float resize_scale, float conf_threshold, float nms_threshold,
                    std::vector &qnt_zps, std::vector &qnt_scales,
                    detect_result_group_t *group, const char *labels[]) {

    memset(group, 0, sizeof(detect_result_group_t));

    std::vector filterBoxes;
    std::vector boxesScore;
    std::vector classId;
    int stride0 = 8;
    int grid_h0 = model_in_h / stride0;
    int grid_w0 = model_in_w / stride0;
    int validCount0 = 0;
    validCount0 = process_u8(input0, (int *) anchor0, grid_h0, grid_w0, model_in_h, model_in_w,
                             stride0, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[0], qnt_scales[0]);

    int stride1 = 16;
    int grid_h1 = model_in_h / stride1;
    int grid_w1 = model_in_w / stride1;
    int validCount1 = 0;
    validCount1 = process_u8(input1, (int *) anchor1, grid_h1, grid_w1, model_in_h, model_in_w,
                             stride1, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[1], qnt_scales[1]);

    int stride2 = 32;
    int grid_h2 = model_in_h / stride2;
    int grid_w2 = model_in_w / stride2;
    int validCount2 = 0;
    validCount2 = process_u8(input2, (int *) anchor2, grid_h2, grid_w2, model_in_h, model_in_w,
                             stride2, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[2], qnt_scales[2]);

    int validCount = validCount0 + validCount1 + validCount2;
    // no object detect
    if (validCount <= 0) {
        return 0;
    }

    std::vector indexArray;
    for (int i = 0; i < validCount; ++i) {
        indexArray.push_back(i);
    }

    quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray);

    nms(validCount, filterBoxes, indexArray, nms_threshold);

    int last_count = 0;
    group->count = 0;
    /* box valid detect target */
    for (int i = 0; i < validCount; ++i) {

        if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= OBJ_NUMB_MAX_SIZE) {
            continue;
        }
        int n = indexArray[i];

        float x1 = filterBoxes[n * 4 + 0];
        float y1 = filterBoxes[n * 4 + 1];
        float x2 = x1 + filterBoxes[n * 4 + 2];
        float y2 = y1 + filterBoxes[n * 4 + 3];
        int id = classId[n];

        group->results[last_count].box.left = (int) ((clamp(x1, 0, model_in_w) - w_offset) / resize_scale);
        group->results[last_count].box.top = (int) ((clamp(y1, 0, model_in_h) - h_offset) / resize_scale);
        group->results[last_count].box.right = (int) ((clamp(x2, 0, model_in_w) - w_offset) / resize_scale);
        group->results[last_count].box.bottom = (int) ((clamp(y2, 0, model_in_h) - h_offset) / resize_scale);
        group->results[last_count].prop = boxesScore[i];
        group->results[last_count].class_index = id;
        const char *label = labels[id];
        strncpy(group->results[last_count].name, label, OBJ_NAME_MAX_SIZE);

        // printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top,
        //        group->results[last_count].box.right, group->results[last_count].box.bottom, label);
        last_count++;
    }
    group->count = last_count;

    return 0;
}
void letterbox(cv::Mat rgb,cv::Mat &img_resize,int target_width,int target_height){

    float shape_0=rgb.rows;
    float shape_1=rgb.cols;
    float new_shape_0=target_height;
    float new_shape_1=target_width;
    float r=std::min(new_shape_0/shape_0,new_shape_1/shape_1);
    float new_unpad_0=int(round(shape_1*r));
    float new_unpad_1=int(round(shape_0*r));
    float dw=new_shape_1-new_unpad_0;
    float dh=new_shape_0-new_unpad_1;
    dw=dw/2;
    dh=dh/2;
    cv::Mat copy_rgb=rgb.clone();
    if(int(shape_0)!=int(new_unpad_0)&&int(shape_1)!=int(new_unpad_1)){
        cv::resize(copy_rgb,img_resize,cv::Size(new_unpad_0,new_unpad_1));
        copy_rgb=img_resize;
    }
    int top=int(round(dh-0.1));
    int bottom=int(round(dh+0.1));
    int left=int(round(dw-0.1));
    int right=int(round(dw+0.1));
    cv::copyMakeBorder(copy_rgb, img_resize,top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(0,0,0));

}
int main(int argc, char **argv) {
    const char *img_path = "../bus.jpg";
    const char *model_path = "../best_remove_transpose.rknn";
    const char *post_process_type = "fp";//fp
    const int target_width = 640;
    const int target_height = 640;
    const char *image_process_mode = "letter_box";
    float resize_scale = 0;
    int h_pad=0;
    int w_pad=0;
    const float nms_threshold = 0.6;
    const float conf_threshold = 0.3;
    const char *labels[] = {"person"};
    // Load image
    cv::Mat bgr = cv::imread(img_path);
    if (!bgr.data) {
        printf("cv::imread %s fail!\n", img_path);
        return -1;
    }
    cv::Mat rgb;
    //BGR->RGB
    cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);

    cv::Mat img_resize;
    float correction[2] = {0, 0};
    float scale_factor[] = {0, 0};
    int width=rgb.cols;
    int height=rgb.rows;
    // Letter box resize
    float img_wh_ratio = (float) width / (float) height;
    float input_wh_ratio = (float) target_width / (float) target_height;
    int resize_width;
    int resize_height;
    if (img_wh_ratio >= input_wh_ratio) {
        //pad height dim
        resize_scale = (float) target_width / (float) width;
        resize_width = target_width;
        resize_height = (int) ((float) height * resize_scale);
        w_pad = 0;
        h_pad = (target_height - resize_height) / 2;
    } else {
        //pad width dim
        resize_scale = (float) target_height / (float) height;
        resize_width = (int) ((float) width * resize_scale);
        resize_height = target_height;
        w_pad = (target_width - resize_width) / 2;;
        h_pad = 0;
    }
    if(strcmp(image_process_mode,"letter_box")==0){
    letterbox(rgb,img_resize,target_width,target_height);
    }else {
        cv::resize(rgb, img_resize, cv::Size(target_width, target_height));
    }
    // Load model
    FILE *fp = fopen(model_path, "rb");
    if (fp == NULL) {
        printf("fopen %s fail!\n", model_path);
        return -1;
    }
    fseek(fp, 0, SEEK_END);
    int model_len = ftell(fp);
    void *model = malloc(model_len);
    fseek(fp, 0, SEEK_SET);
    if (model_len != fread(model, 1, model_len, fp)) {
        printf("fread %s fail!\n", model_path);
        free(model);
        return -1;
    }


    rknn_context ctx = 0;

    int ret = rknn_init(&ctx, model, model_len, 0);
    if (ret < 0) {
        printf("rknn_init fail! ret=%d\n", ret);
        return -1;
    }

    /* Query sdk version */
    rknn_sdk_version version;
    ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version,
                     sizeof(rknn_sdk_version));
    if (ret < 0) {
        printf("rknn_init error ret=%d\n", ret);
        return -1;
    }
    printf("sdk version: %s driver version: %s\n", version.api_version,
           version.drv_version);


    /* Get input,output attr */
    rknn_input_output_num io_num;
    ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
    if (ret < 0) {
        printf("rknn_init error ret=%d\n", ret);
        return -1;
    }
    printf("model input num: %d, output num: %d\n", io_num.n_input,
           io_num.n_output);

    rknn_tensor_attr input_attrs[io_num.n_input];
    memset(input_attrs, 0, sizeof(input_attrs));
    for (int i = 0; i < io_num.n_input; i++) {
        input_attrs[i].index = i;
        ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
                         sizeof(rknn_tensor_attr));
        if (ret < 0) {
            printf("rknn_init error ret=%d\n", ret);
            return -1;
        }
        printRKNNTensor(&(input_attrs[i]));
    }

    rknn_tensor_attr output_attrs[io_num.n_output];
    memset(output_attrs, 0, sizeof(output_attrs));
    for (int i = 0; i < io_num.n_output; i++) {
        output_attrs[i].index = i;
        ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
                         sizeof(rknn_tensor_attr));
        printRKNNTensor(&(output_attrs[i]));
    }

    int input_channel = 3;
    int input_width = 0;
    int input_height = 0;
    if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
        printf("model is NCHW input fmt\n");
        input_width = input_attrs[0].dims[0];
        input_height = input_attrs[0].dims[1];
        printf("input_width=%d input_height=%d\n", input_width, input_height);
    } else {
        printf("model is NHWC input fmt\n");
        input_width = input_attrs[0].dims[1];
        input_height = input_attrs[0].dims[2];
        printf("input_width=%d input_height=%d\n", input_width, input_height);
    }

    printf("model input height=%d, width=%d, channel=%d\n", input_height, input_width,
           input_channel);


/* Init input tensor */
    rknn_input inputs[1];
    memset(inputs, 0, sizeof(inputs));
    inputs[0].index = 0;
    inputs[0].buf = img_resize.data;
    inputs[0].type = RKNN_TENSOR_UINT8;
    inputs[0].size = input_width * input_height * input_channel;
    inputs[0].fmt = RKNN_TENSOR_NHWC;
    inputs[0].pass_through = 0;

    /* Init output tensor */
    rknn_output outputs[io_num.n_output];
    memset(outputs, 0, sizeof(outputs));
    for (int i = 0; i < io_num.n_output; i++) {
        if (strcmp(post_process_type, "fp") == 0) {
            outputs[i].want_float = 1;
        } else if (strcmp(post_process_type, "u8") == 0) {
            outputs[i].want_float = 0;
        }
    }
    printf("img.cols: %d, img.rows: %d\n", img_resize.cols, img_resize.rows);
    auto t1=std::chrono::steady_clock::now();
    rknn_inputs_set(ctx, io_num.n_input, inputs);
    ret = rknn_run(ctx, NULL);
    if (ret < 0) {
        printf("ctx error ret=%d\n", ret);
        return -1;
    }
    ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
    if (ret < 0) {
        printf("outputs error ret=%d\n", ret);
        return -1;
    }
    /* Post process */
    std::vector out_scales;
    std::vector out_zps;
    for (int i = 0; i < io_num.n_output; ++i) {
        out_scales.push_back(output_attrs[i].scale);
        out_zps.push_back(output_attrs[i].zp);
    }

    detect_result_group_t detect_result_group;
    if (strcmp(post_process_type, "u8") == 0) {
        post_process_u8((uint8_t *) outputs[0].buf, (uint8_t *) outputs[1].buf, (uint8_t *) outputs[2].buf,
                        input_height, input_width,
                        h_pad, w_pad, resize_scale, conf_threshold, nms_threshold, out_zps, out_scales,
                        &detect_result_group, labels);
    } else if (strcmp(post_process_type, "fp") == 0) {
        post_process_fp((float *) outputs[0].buf, (float *) outputs[1].buf, (float *) outputs[2].buf, input_height,
                        input_width,
                        h_pad, w_pad, resize_scale, conf_threshold, nms_threshold, &detect_result_group, labels);
    }
//毫秒级
    auto t2=std::chrono::steady_clock::now();
    double dr_ms=std::chrono::duration(t2-t1).count();
    printf("%lf ms\n",dr_ms);


    for (int i = 0; i < detect_result_group.count; i++) {
        detect_result_t *det_result = &(detect_result_group.results[i]);
        printf("%s @ (%d %d %d %d) %f\n",
               det_result->name,
               det_result->box.left, det_result->box.top, det_result->box.right, det_result->box.bottom,
               det_result->prop);
        int bx1 = det_result->box.left;
        int by1 = det_result->box.top;
        int bx2 = det_result->box.right;
        int by2 = det_result->box.bottom;
        cv::rectangle(bgr, cv::Point(bx1, by1), cv::Point(bx2, by2), cv::Scalar(231, 232, 143));  //两点的方式
        char text[256];
        sprintf(text, "%s %.1f%% ", det_result->name, det_result->prop * 100);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int x = bx1;
        int y = by1 - label_size.height - baseLine;
        if (y < 0)
            y = 0;
        if (x + label_size.width > bgr.cols)
            x = bgr.cols - label_size.width;


        cv::rectangle(bgr, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cv::Scalar(0, 0, 255), -1);

        cv::putText(bgr, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_DUPLEX, 0.4, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);

        cv::imwrite("../bgr9.jpg", bgr);
    }


    ret = rknn_outputs_release(ctx, io_num.n_output, outputs);

    if (ret < 0) {
        printf("rknn_query fail! ret=%d\n", ret);
        goto Error;
    }


    Error:
    if (ctx > 0)
        rknn_destroy(ctx);
    if (model)
        free(model);
    if (fp)
        fclose(fp);
    return 0;
}

测试结果

54、记录yolov7 训练、部署ncnn、部署mnn、部署rk3399 npu、部署oak vpu、部署openvino、部署TensorRT_第5张图片

 

十、openvino部署

待完成20221216 晚上

参考

mnn-yolov5: Depoly YoloV5 by MNN.

你可能感兴趣的:(Python基础知识,ubuntu,linux,运维)