使用OpenVINO完成PyTorch YOLOv5模型推理计算

下载并将YOLOv5模型转换为onnx格式

YOLOv5 v6.1版本合并了输出,更加容易解析输出结果

参考https://docs.ultralytics.com/quick-start/,完成YOLOv5 开发环境搭建

使用命令将YOLOv5 Pytorch模型转换为onnx模型:

python export.py --weights yolov5s.pt --include onnx

转换结果
1647659625(1).png

遇到网络原因,无法从Github上下载YOLOv5权重时
网络原因,从Github上下载YOLOv5权重失败

请copy yolov5权重下载链接,使用迅雷下载
使用训练解决下载YOLOv5权重问题

再次执行Export命令,成功!
转换onnx模型成功

使用Netron查看yolov5s.onnx v6.1模型

yolov5s.onnx v6.1模型

可以看到:

  • 模型输入节点,name:images; shape NCHW=[1,3,640,640]; type:float32
  • 模型输出节点,name: output; shape [1,25200,85], type:float32

output整合了之前三层(v6.1之前版本是三个输出节点)的原始输出,每一行85个数值,前面5个数值分别是:

cx, cy, w, h, score 后面80个MSCOCO的分类得分

使用OpenCV DNN API完成推理

完整GitHub Repo 地址:https://gitee.com/ppov-nuc/yolov5_infer

import cv2
import numpy as np
import time
import yaml

# 载入COCO Label
with open('./coco.yaml','r', encoding='utf-8') as f:
    result = yaml.load(f.read(),Loader=yaml.FullLoader)
class_list = result['names']

# YOLOv5s输入尺寸
INPUT_WIDTH = 640
INPUT_HEIGHT = 640

# 目标检测函数,返回检测结果
def detect(image, net):
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
    net.setInput(blob)
    preds = net.forward()
    return preds

# YOLOv5的后处理函数,解析模型的输出
def wrap_detection(input_image, output_data):
    class_ids = []
    confidences = []
    boxes = []
    #print(output_data.shape)
    rows = output_data.shape[0]

    image_width, image_height, _ = input_image.shape

    x_factor = image_width / INPUT_WIDTH
    y_factor = image_height / INPUT_HEIGHT

    for r in range(rows):
        row = output_data[r]
        confidence = row[4]
        if confidence >= 0.4:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > .25):
                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        result_confidences.append(confidences[i])
        result_class_ids.append(class_ids[i])
        result_boxes.append(boxes[i])

    return result_class_ids, result_confidences, result_boxes

# 按照YOLOv5要求,先将图像长:宽 = 1:1,多余部分填充黑边
def format_yolov5(frame):
    row, col, _ = frame.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = frame
    return result


# 载入yolov5s.onnx模型
model_path = "./yolov5s.onnx"
# Read yolov5s.onnx model with OpenCV API
net = cv2.dnn.readNetFromONNX(model_path)

# 开启Webcam,并设置为1280x720
cap = cv2.VideoCapture(0,cv2.CAP_DSHOW)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# 调色板
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]

# 开启检测循环
while True:
    start = time.time()
    _, frame = cap.read()
    if frame is None:
        print("End of stream")
        break
    # 将图像按最大边1:1放缩
    inputImage = format_yolov5(frame)
    # 执行推理计算
    outs = detect(inputImage, net)
    # 拆解推理结果
    class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])

    # 显示检测框bbox
    for (classid, confidence, box) in zip(class_ids, confidences, boxes):
        color = colors[int(classid) % len(colors)]
        cv2.rectangle(frame, box, color, 2)
        cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
        cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 0))
    
    # 显示推理速度FPS
    end = time.time()
    inf_end = end - start
    fps = 1 / inf_end
    fps_label = "FPS: %.2f" % fps
    cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    print(fps_label+ "; Detections: " + str(len(class_ids)))
    cv2.imshow("output", frame)

    if cv2.waitKey(1) > -1:
        print("finished by user")
        break

使用OpenVINO API完成推理

import cv2
import numpy as np
import time
import yaml
from openvino.inference_engine import IECore # the version of openvino <= 2021.4.2

# 载入COCO Label
with open('./coco.yaml','r', encoding='utf-8') as f:
    result = yaml.load(f.read(),Loader=yaml.FullLoader)
class_list = result['names']

# YOLOv5s输入尺寸
INPUT_WIDTH = 640
INPUT_HEIGHT = 640

# 目标检测函数,返回检测结果
def detect(image, net):
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
    result = net.infer({"images": blob})
    preds = result["output"]
    return preds

# YOLOv5的后处理函数,解析模型的输出
def wrap_detection(input_image, output_data):
    class_ids = []
    confidences = []
    boxes = []
    #print(output_data.shape)
    rows = output_data.shape[0]

    image_width, image_height, _ = input_image.shape

    x_factor = image_width / INPUT_WIDTH
    y_factor = image_height / INPUT_HEIGHT

    for r in range(rows):
        row = output_data[r]
        confidence = row[4]
        if confidence >= 0.4:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > .25):
                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        result_confidences.append(confidences[i])
        result_class_ids.append(class_ids[i])
        result_boxes.append(boxes[i])

    return result_class_ids, result_confidences, result_boxes

# 按照YOLOv5要求,先将图像长:宽 = 1:1,多余部分填充黑边
def format_yolov5(frame):
    row, col, _ = frame.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = frame
    return result


# 载入yolov5s onnx模型
model_path = "./yolov5s.onnx"
# Read yolov5s onnx model with OpenVINO API
ie = IECore()
exec_net = ie.load_network(network=model_path, device_name="CPU")

# 开启Webcam,并设置为1280x720
cap = cv2.VideoCapture(0,cv2.CAP_DSHOW)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# 调色板
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]

# 开启检测循环
while True:
    start = time.time()
    _, frame = cap.read()
    if frame is None:
        print("End of stream")
        break
    # 将图像按最大边1:1放缩
    inputImage = format_yolov5(frame)
    # 执行推理计算
    outs = detect(inputImage, exec_net)
    # 拆解推理结果
    class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])

    # 显示检测框bbox
    for (classid, confidence, box) in zip(class_ids, confidences, boxes):
        color = colors[int(classid) % len(colors)]
        cv2.rectangle(frame, box, color, 2)
        cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
        cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 0))
    
    # 显示推理速度FPS
    end = time.time()
    inf_end = end - start
    fps = 1 / inf_end
    fps_label = "FPS: %.2f" % fps
    cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    print(fps_label+ "; Detections: " + str(len(class_ids)))
    cv2.imshow("output", frame)

    if cv2.waitKey(1) > -1:
        print("finished by user")
        break

你可能感兴趣的:(使用OpenVINO完成PyTorch YOLOv5模型推理计算)