注意:不需要NVIDIA官网下载Tensorrt安装;直接使用pip 是可以安装成功的。
模型转换:
"""
nvidia-pyindex 1.0.9
nvidia-tensorrt 8.0.1.6
"""
import tensorrt as trt
'''
通过加载onnx文件,构建engine
'''
onnx_file_path = "model.onnx" # 输入需要转换的onnx文件
G_LOGGER = trt.Logger(trt.Logger.WARNING)
# 1、动态输入第一点必须要写的
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
batch_size = 1 # trt推理时最大支持的batchsize
with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, \
trt.OnnxParser(network, G_LOGGER) as parser:
builder.max_batch_size = batch_size
config = builder.create_builder_config()
config.max_workspace_size = 1 << 32 # common文件可以自己去tensorrt官方例程下面找
config.set_flag(trt.BuilderFlag.TF32)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(
onnx_file_path))
# 动态输入问题解决方案
profile = builder.create_optimization_profile()
profile.set_shape("input_1", (1, 32, 128, 3),
(1, 32, 128, 3), (1, 32, 128, 3))
config.add_optimization_profile(profile)
engine = builder.build_engine(network, config)
print("Completed creating Engine")
# 保存输出的engine文件,并自定义engine文件名称
engine_file_path = 'face_rec.engine'
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
推理测试:
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
import torch
from collections import OrderedDict, namedtuple
import numpy as np
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open('face_rec.engine', 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
bindings = OrderedDict()
fp16 = False # default updated below
for index in range(model.num_bindings):
name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index))
shape = tuple(model.get_binding_shape(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).cuda()
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
if model.binding_is_input(index) and dtype == np.float16:
fp16 = True
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
# 获取onnx的输入输出节点 对应session.run(['703','702'],{'input.1':input})
print(bindings.keys())
batch_size = bindings['input.1'].shape[0]
# input.1 对应onnx模型的输入节点
if __name__ == '__main__':
img = torch.randn(1, 3, 32, 128).cuda()
binding_addrs['input.1'] = int(img.data_ptr())
context.execute_v2(list(binding_addrs.values()))
y = bindings['703'].data
print(y)