TensorRT 5/6/7 加载onnx模型

TensorRT5/6加载并转换onnx:

def onnx_2_trt(onnx_model_name,trt_model_name):
    with trt.Builder(G_LOGGER) as builder, builder.create_network() as network, 
    trt.OnnxParser(network, G_LOGGER) as parser:
        builder.max_batch_size = 1024
        builder.max_workspace_size = 2 << 30

        print('Loading ONNX file from path {}...'.format(onnx_model_name))
        with open(onnx_model_name, 'rb') as model:
            print('Beginning ONNX file parsing')
            b = parser.parse(model.read())

        if 1:
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(args.onnx_model))

            ####
            #builder.int8_mode = True
            #builder.int8_calibrator = calib
            builder.fp16_mode = True
            ####
            print("layers:",network.num_layers)
            network.mark_output(network.get_layer(network.num_layers - 1).get_output(0))//有的模型需要,有的模型在转onnx的之后已经指定了,就不需要这行

            engine = builder.build_cuda_engine(network)
            print(engine)
            print("Completed creating Engine")
            with open(trt_model_name, "wb") as f:
                f.write(engine.serialize())
            return engine

        else:
            print('Number of errors: {}'.format(parser.num_errors))
            error = parser.get_error(0) # if it gets mnore than one error this have to be changed
            del parser
            desc = error.desc()
            line = error.line()
            code = error.code()
            print('Description of the error: {}'.format(desc))
            print('Line where the error occurred: {}'.format(line))
            print('Error code: {}'.format(code))
            print("Model was not parsed successfully")
            exit(0)

TensorRT7加载并转换onnx:

TensorRT7转换onxx的方式与5/6不太一样,

def ONNX_build_engine(trt_model_name,onnx_model_name):
  explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)//trt7
  with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network, G_LOGGER) as parser:
    builder.max_batch_size = batch_size
    builder.max_workspace_size = max_workspace << 30
    print('Loading ONNX file from path {}...'.format(onnx_model_name))
    with open(onnx_model_name, 'rb') as model:
        print('Beginning ONNX file parsing')
        #b = parser.parse(model.read())
        if not parser.parse(model.read()):
            for error in range(parser.num_errors):
                print(parser.get_error(error))
    if 1:
        print('Completed parsing of ONNX file')
        print('Building an engine from file {}; this may take a while...'.format(onnx_model_name))

        ####
        #builder.int8_mode = True
        #builder.int8_calibrator = calib
        builder.fp16_mode = True
        ####
        print("num layers:",network.num_layers)
        #last_layer = network.get_layer(network.num_layers - 1)
        #if not last_layer.get_output(0):
        #network.mark_output(network.get_layer(network.num_layers - 1).get_output(0))//有的模型需要,有的模型在转onnx的之后已经指定了,就不需要这行
        network.get_input(0).shape = [batch_size, 3, 224, 224]//trt7
        engine = builder.build_cuda_engine(network)
        print("engine:",engine)
        print("Completed creating Engine")
        with open(trt_model_name, "wb") as f:
            f.write(engine.serialize())
        return engine

    else:
        print('Number of errors: {}'.format(parser.num_errors))
        error = parser.get_error(0) # if it gets mnore than one error this have to be changed
        del parser
        desc = error.desc()
        line = error.line()
        code = error.code()
        print('Description of the error: {}'.format(desc))
        print('Line where the error occurred: {}'.format(line))
        print('Error code: {}'.format(code))
        print("Model was not parsed successfully")
        exit(0)

其中,trt7若缺少explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH),报错:

In node -1 (importModel): INVALID_VALUE: Assertion failed: !_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag."

or :

python3: ../builder/Network.cpp:863: virtual nvinfer1::ILayer* nvinfer1::Network::getLayer(int) const: Assertion `layerIndex >= 0' failed.      无法解析onnx model

并且 trt7需要指定input size:network.get_input(0).shape = [1, 3, 224, 224],不然报错:

[TensorRT] ERROR: Network has dynamic or shape inputs, but no optimization profile has been defined.
[TensorRT] ERROR: Network validation failed.

 

tensorrt镜像中 /workspace/tensorrt/samples/python/ 有示例

Tensorrt7对onnx的要求:

用以上trt7代码对onnx模型做转换,仍不能成功,测得的gpu latency值会非常小,

torch模型转onnx模型部是需要设置动态尺寸:

def torch2onnx_dynamic():
    from resnet50 import ResNet50
    net = ResNet50()
    torch_model = "torch_model.pth"
    onnx_model = "onnx_model.onnx"
    checkpoint = torch.load(torch_model, map_location=torch.device('cpu'))

    batch_size = 1024 #batch_size设1或其它都可以
    dummy_input = torch.randn(batch_size, 3, 224, 224, requires_grad=False)
    input_names = ['input']
    output_names = ['output']
    dynamic_axes = {'input': {0: 'batch_size'}, #重点
                    'output': {0: 'batch_size'}} #重点
    #net.cuda()
    #net.eval()
    net.load_state_dict(checkpoint['state_dict'],False)

更新:

又发现一个问题:

int8量化时,当builder.max_batch_size >1时,不管onxx dynamic的batch_size多大,会出现如下错误:

[TensorRT] ERROR: ../builder/cudnnCalibrator.cpp (724) - Cuda Error in add: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: ../builder/cudnnCalibrator.cpp (724) - Cuda Error in add: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: FAILED_ALLOCATION: std::exception
[TensorRT] ERROR: ../rtSafe/cuda/caskConvolutionRunner.cpp (351) - Cuda Error in allocateContextResources: 700 (an illegal memory access was encountered)
[TensorRT] ERROR: FAILED_EXECUTION: std::exception
[TensorRT] INTERNAL ERROR: Assertion failed: context->executeV2(&bindings[0])
../builder/cudnnCalibrator.cpp:1103
Aborting...
[TensorRT] ERROR: ../rtSafe/safeRuntime.cpp (32) - Cuda Error in free: 700 (an illegal memory access was encountered)
terminate called after throwing an instance of 'nvinfer1::CudaError'
  what():  std::exception
Aborted (core dumped)

解决方法:

首先builder.max_batch_size=1,构建engine,会生成一个calibration.cahe,然后再设置不同的builder.max_batch_size构建engnie,读取=1时生成的cache,不会出错。

你可能感兴趣的:(深度学习,tensorrt,深度学习)