【代码片段】ONNX 模型转成Tensorrt 及推理测试

注意:不需要NVIDIA官网下载Tensorrt安装;直接使用pip 是可以安装成功的。

模型转换: 

"""
nvidia-pyindex            1.0.9
nvidia-tensorrt           8.0.1.6
"""

import tensorrt as trt

'''
通过加载onnx文件,构建engine
'''
onnx_file_path = "model.onnx"  # 输入需要转换的onnx文件

G_LOGGER = trt.Logger(trt.Logger.WARNING)

# 1、动态输入第一点必须要写的
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

batch_size = 1  # trt推理时最大支持的batchsize

with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, \
        trt.OnnxParser(network, G_LOGGER) as parser:

    builder.max_batch_size = batch_size

    config = builder.create_builder_config()
    config.max_workspace_size = 1 << 32  # common文件可以自己去tensorrt官方例程下面找
    config.set_flag(trt.BuilderFlag.TF32)
    print('Loading ONNX file from path {}...'.format(onnx_file_path))

    with open(onnx_file_path, 'rb') as model:
        print('Beginning ONNX file parsing')
        parser.parse(model.read())
    print('Completed parsing of ONNX file')
    print('Building an engine from file {}; this may take a while...'.format(
        onnx_file_path))

    # 动态输入问题解决方案
    profile = builder.create_optimization_profile()
    profile.set_shape("input_1", (1, 32, 128, 3),
                      (1, 32, 128,  3), (1, 32, 128, 3))
    config.add_optimization_profile(profile)

    engine = builder.build_engine(network, config)
    print("Completed creating Engine")

    # 保存输出的engine文件,并自定义engine文件名称
    engine_file_path = 'face_rec.engine'
    with open(engine_file_path, "wb") as f:
        f.write(engine.serialize())

推理测试:

import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
import torch
from collections import OrderedDict, namedtuple
import numpy as np


Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open('face_rec.engine', 'rb') as f, trt.Runtime(logger) as runtime:
    model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict()
fp16 = False  # default updated below
for index in range(model.num_bindings):
    name = model.get_binding_name(index)
    dtype = trt.nptype(model.get_binding_dtype(index))
    shape = tuple(model.get_binding_shape(index))
    data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).cuda()
    bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
    if model.binding_is_input(index) and dtype == np.float16:
        fp16 = True
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
# 获取onnx的输入输出节点 对应session.run(['703','702'],{'input.1':input})
print(bindings.keys())
batch_size = bindings['input.1'].shape[0]
# input.1  对应onnx模型的输入节点

if __name__ == '__main__':
    img = torch.randn(1, 3, 32, 128).cuda()
    binding_addrs['input.1'] = int(img.data_ptr())
    context.execute_v2(list(binding_addrs.values()))
    y = bindings['703'].data
    print(y)

 

你可能感兴趣的:(pyTorch,深度学习,batch,pytorch)