def onnx_2_trt(onnx_model_name,trt_model_name):
with trt.Builder(G_LOGGER) as builder, builder.create_network() as network,
trt.OnnxParser(network, G_LOGGER) as parser:
builder.max_batch_size = 1024
builder.max_workspace_size = 2 << 30
print('Loading ONNX file from path {}...'.format(onnx_model_name))
with open(onnx_model_name, 'rb') as model:
print('Beginning ONNX file parsing')
b = parser.parse(model.read())
if 1:
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(args.onnx_model))
####
#builder.int8_mode = True
#builder.int8_calibrator = calib
builder.fp16_mode = True
####
print("layers:",network.num_layers)
network.mark_output(network.get_layer(network.num_layers - 1).get_output(0))//有的模型需要,有的模型在转onnx的之后已经指定了,就不需要这行
engine = builder.build_cuda_engine(network)
print(engine)
print("Completed creating Engine")
with open(trt_model_name, "wb") as f:
f.write(engine.serialize())
return engine
else:
print('Number of errors: {}'.format(parser.num_errors))
error = parser.get_error(0) # if it gets mnore than one error this have to be changed
del parser
desc = error.desc()
line = error.line()
code = error.code()
print('Description of the error: {}'.format(desc))
print('Line where the error occurred: {}'.format(line))
print('Error code: {}'.format(code))
print("Model was not parsed successfully")
exit(0)
TensorRT7转换onxx的方式与5/6不太一样,
def ONNX_build_engine(trt_model_name,onnx_model_name):
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)//trt7
with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network, G_LOGGER) as parser:
builder.max_batch_size = batch_size
builder.max_workspace_size = max_workspace << 30
print('Loading ONNX file from path {}...'.format(onnx_model_name))
with open(onnx_model_name, 'rb') as model:
print('Beginning ONNX file parsing')
#b = parser.parse(model.read())
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
if 1:
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_model_name))
####
#builder.int8_mode = True
#builder.int8_calibrator = calib
builder.fp16_mode = True
####
print("num layers:",network.num_layers)
#last_layer = network.get_layer(network.num_layers - 1)
#if not last_layer.get_output(0):
#network.mark_output(network.get_layer(network.num_layers - 1).get_output(0))//有的模型需要,有的模型在转onnx的之后已经指定了,就不需要这行
network.get_input(0).shape = [batch_size, 3, 224, 224]//trt7
engine = builder.build_cuda_engine(network)
print("engine:",engine)
print("Completed creating Engine")
with open(trt_model_name, "wb") as f:
f.write(engine.serialize())
return engine
else:
print('Number of errors: {}'.format(parser.num_errors))
error = parser.get_error(0) # if it gets mnore than one error this have to be changed
del parser
desc = error.desc()
line = error.line()
code = error.code()
print('Description of the error: {}'.format(desc))
print('Line where the error occurred: {}'.format(line))
print('Error code: {}'.format(code))
print("Model was not parsed successfully")
exit(0)
其中,trt7若缺少explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH),报错:
In node -1 (importModel): INVALID_VALUE: Assertion failed: !_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag."
or :
python3: ../builder/Network.cpp:863: virtual nvinfer1::ILayer* nvinfer1::Network::getLayer(int) const: Assertion `layerIndex >= 0' failed. 无法解析onnx model
并且 trt7需要指定input size:network.get_input(0).shape = [1, 3, 224, 224],不然报错:
[TensorRT] ERROR: Network has dynamic or shape inputs, but no optimization profile has been defined.
[TensorRT] ERROR: Network validation failed.
tensorrt镜像中 /workspace/tensorrt/samples/python/ 有示例
Tensorrt7对onnx的要求:
用以上trt7代码对onnx模型做转换,仍不能成功,测得的gpu latency值会非常小,
torch模型转onnx模型部是需要设置动态尺寸:
def torch2onnx_dynamic():
from resnet50 import ResNet50
net = ResNet50()
torch_model = "torch_model.pth"
onnx_model = "onnx_model.onnx"
checkpoint = torch.load(torch_model, map_location=torch.device('cpu'))
batch_size = 1024 #batch_size设1或其它都可以
dummy_input = torch.randn(batch_size, 3, 224, 224, requires_grad=False)
input_names = ['input']
output_names = ['output']
dynamic_axes = {'input': {0: 'batch_size'}, #重点
'output': {0: 'batch_size'}} #重点
#net.cuda()
#net.eval()
net.load_state_dict(checkpoint['state_dict'],False)
更新:
又发现一个问题:
int8量化时,当builder.max_batch_size >1时,不管onxx dynamic的batch_size多大,会出现如下错误:
[TensorRT] ERROR: ../builder/cudnnCalibrator.cpp (724) - Cuda Error in add: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: ../builder/cudnnCalibrator.cpp (724) - Cuda Error in add: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: FAILED_ALLOCATION: std::exception
[TensorRT] ERROR: ../rtSafe/cuda/caskConvolutionRunner.cpp (351) - Cuda Error in allocateContextResources: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: FAILED_EXECUTION: std::exception
[TensorRT] INTERNAL ERROR: Assertion failed: context->executeV2(&bindings[0])
../builder/cudnnCalibrator.cpp:1103
Aborting...
[TensorRT] ERROR: ../rtSafe/safeRuntime.cpp (32) - Cuda Error in free: 700 (an illegal memory access was encountered)
terminate called after throwing an instance of 'nvinfer1::CudaError'
what(): std::exception
Aborted (core dumped)
解决方法:
首先builder.max_batch_size=1,构建engine,会生成一个calibration.cahe,然后再设置不同的builder.max_batch_size构建engnie,读取=1时生成的cache,不会出错。