onnx模型推理及获得各层输出结果

如标题所述,我们在代码中做了推量部分,并对于使用gpu还是cpu 做了一个自动处理,修改provider就可以,如果安装的onnxruntime是gpu版那么是调用gpu ,否则使用cpu;对于使用cpu时,开启了多线程,会加快推理速度。

import cv2
import os
import glob
import subprocess
import numpy as np
from tqdm import tqdm
import onnxruntime as ort
import onnx
import sys
import shutil

def normalize(img,scale=None,mean=None,std=None):
    
    if isinstance(scale, str):
        scale = eval(scale)

    scale = np.float32(scale if scale is not None else 1.0 / 255.0)

    mean = mean if mean is not None else [0.485, 0.456, 0.406]
    std = std if std is not None else [0.229, 0.224, 0.225]

    shape =  (1, 1, 3)
    mean = np.array(mean).reshape(shape).astype('float32')
    std = np.array(std).reshape(shape).astype('float32')
    assert isinstance(img,
                    np.ndarray), "invalid input 'img' in normalize"
    img = (img.astype('float32') * scale - mean) / std
    return img
model = onnx.load("model.onnx")
# 模型推理
ori_output = copy.deepcopy(model .graph.output)
# 输出模型每层的输出
for node in model.graph.node:
    for output in node.output:
        if output not in ori_output:
            model.graph.output.extend([onnx.ValueInfoProto(name=output)])

#进行配置
if ort.get_device()=="CPU":
    config = ort.SessionOptions()
    ret,val=subprocess.getstatusoutput("cat /proc/cpuinfo | grep 'core id' |sort |uniq | wc -l")
    if ret==0:
        cpu_num_thread = int(val)
    else:
        cpu_num_thread=4
    config.intra_op_num_threads = cpu_num_thread
    config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    providers=["CPUExecutionProvider"]
    ort_session = ort.InferenceSession(model.SerializeToString(),providers=providers,sess_options=config)
elif ort.get_device()=="GPU":
    providers=["CPUExecutionProvider"]
    ort_session = ort.InferenceSession(model.SerializeToString(),providers=providers)

image_list=["test.jpg"]
# for root,dir,files in os.walk('need_test/crop/'):
#     if len(files):
#         for ff in files:
#             n = os.path.join(root,ff)
#             image_list.append(n)

for img_path in tqdm(image_list):
    img = cv2.imread(img_path)
    if img is None:
        continue
    img = img[:, :, ::-1]
    img = cv2.resize(img,(224,224))
    img = normalize(img)
    img = img.transpose((2,0,1))
    image = np.expand_dims(img,axis=0)
    ort_inputs = {ort_session.get_inputs()[0].name: image}
    #获取所有节点输出
    outputs = [x.name for x in ort_session.get_outputs()] #ort_session.get_outputs()[0].name是原模型的单一输出
    ort_outs = ort_session.run(output_names=outputs, input_feed=ort_inputs)    
    # 生成字典,便于查找层对应输出
    ort_outs = OrderedDict(zip(outputs, ort_outs))

应用场有几个:

  • 1、选择不同硬件进行推理并获得结查
  • 2、cpu推理实现多线程
  • 3、获到网络各层的输出,这有个场景,我们通常会使用onnx sim 或onnx optimizer对模型做转换,这样可以对比转换前后的差异。

你可能感兴趣的:(ONNX,onnx,gpu,onnx,cpu,onnx,多节点输出)