[TensorRT实战] (1) 转换pytorch模型,一点经验之谈与例子

       最近一段时间一直在负责转模型为tensorRT, 鉴于我们部门的模型现在基本上都是pytorch的了,其他框架暂时不考虑。这里总结一 下经验与方法,并放一个使用network API 转换 resnet18的例子,这个例子是我做另一个网络模型转换时的其中的骨干网络,其他部分差不太多,不过因为是工作中的内容,就不放出来了,这里使用resnet18为例。

       对一个pytorch 模型,通常可以使用官方的方法,把它转成onnx,并使用TensorRT官方的,trtexec 工具转成模型,或使用网络定义api重写网络并赋值权重,先说几个经验:

  • 如果可以使用官方的trtexec 直接把你的从pytorch导出onnx模型转成trt,那这是最方便快捷的,所以先测试下这个路行不行(注意opset版本要与你使用的trt兼容)
  • 如果trtexec不能直接转onnx. (例如pytorch里,上采样算子 upsample 与torch.nn.functional.interpolate,对于LINEAR模式的,有align_corners属性的常常有问题)可以考虑使用 onnx2trt ,我在我一个模型上尝试过,但精度在模型加载完我自己的权重后与原版本的pytorch不一样,不加载是一样的。总之奇怪的问题比较多时,考虑使用网络定义。

调试方法

     你环境正常的情况下,是可以像普通程序一样调试的,当然构建过程可能看不了,不过trt的报错很清楚,只要注意给每层命名,你就能知道是哪个层出的问题,然后对照原代码,检查kernelsize 什么的参数是不是设置错了,一般来说如果设置错会报错说形状不一样的。 

     关键点:

  1.   注意给每层命名
  2.  如果哪层有问题,从后向前面注释掉些层,检查哪里的问题
  3.  也可以逐层检查输出结果,很简单,你把一个中间层设置为输出结点,对原代码与现在生成的都输入同样的数据,查看结果是不是一样(对pytorch的debug就可以看到一些中间结果了)。 比如都输入一个tensorshape为(1,3,256,256)全为1,两边shape 一定要一样,不然他们的输出结果不一样很正常。

不知道咋实现的层:

         找找官方api吧(https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#) ,其实应该都有,比如我想找两个tensor相加,就没注意到是add_elementwise,点开看看IElementWiseLayer 的说明,就发现是它了,它可以对两个tensor做各种算术运算。

 

ResNet18的转换

       先看下pytorch 的resnet18的核心代码,具体代码可以去我的github上查看,请点:RESNET18。

 pytorch 的resnet18 核心代码:  


class ResNet(nn.Module):
    def __init__(self, block, layers, in_channels=3, dcn=None):
        self.dcn = dcn
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.out_channels = []
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dcn=dcn)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dcn=dcn)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dcn=dcn)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        if self.dcn is not None:
            for m in self.modules():
                if isinstance(m, Bottleneck) or isinstance(m, BasicBlock):
                    if hasattr(m, 'conv2_offset'):
                        constant_init(m.conv2_offset, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dcn=None):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, dcn=dcn))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, dcn=dcn))
        self.out_channels.append(planes * block.expansion)
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x2 = self.layer1(x)
        x3 = self.layer2(x2)
        x4 = self.layer3(x3)
        x5 = self.layer4(x4)

        return x2, x3, x4, x5

建议先用这个网络代码,保存下来一个模型权重文件,权重是随机初始化的还是训练好的不重要,可以调试就行了,然后把模型打印下,对结构有个了解,权重的所有key打印下对权重组成有个了解:

 # torch.save(net.state_dict(), model_path)
    weights = torch.load(model_path)
    for k in weights:
        print(k, weights[k].size())
"""
layer1.0.conv1.weight torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.bn1.running_mean torch.Size([64])
layer1.0.bn1.running_var torch.Size([64])
layer1.0.bn1.num_batches_tracked torch.Size([])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
"""

basic block :

对resnet 基本block的实现 ,放一个函数里方便复用。注意BN层的实现:


def basic_block_network(input_tensor, network, weights, out_channel, stride=1, layerweight_id=1,layerweight_sub_id=0,downsample=None):

    residual = input_tensor

    weight_key_prefix_first = "layer" + str(layerweight_id) + "."+str(layerweight_sub_id)+"."  # layerweight_id = 1-4 layerweight_sub_id = 0 or 1

    print("---> weight_key_prefix_first:", weight_key_prefix_first)
    print("input tensor shape ", input_tensor.shape)

    conv1_w = weights[weight_key_prefix_first + 'conv1.weight'].cpu().numpy()
    conv1 = conv3x3(input_tensor, out_channel, network, conv1_w, stride)
    conv1.name = weight_key_prefix_first + 'conv1'

    print("-------------", conv1.name)

    print(weight_key_prefix_first+"conv1 .shape:", conv1.get_output(0).shape)

    bn_gamma1 = weights[weight_key_prefix_first + 'bn1.weight'].cpu().numpy()        # bn gamma
    bn_bias1 = weights[weight_key_prefix_first + 'bn1.bias'].cpu().numpy()          # bn beta
    bn_mean1 = weights[weight_key_prefix_first + 'bn1.running_mean'].cpu().numpy()  # bn mean
    bn_var1 = weights[weight_key_prefix_first + 'bn1.running_var'].cpu().numpy()   # bn var sqrt
    bn1 = batchnorm(conv1.get_output(0), network, bn_gamma1, bn_bias1, bn_mean1, bn_var1)
    bn1.name = weight_key_prefix_first + 'bn1'

    print("-------------", bn1.name)


    relu1 = relu(bn1.get_output(0), network)
    relu1.name = weight_key_prefix_first + "relu1"
    print("-------------", relu1.name)


    conv2_w = weights[weight_key_prefix_first + 'conv2.weight'].cpu().numpy()
    conv2 = conv3x3(relu1.get_output(0), out_channel, network, conv2_w)
    conv2.name = weight_key_prefix_first + "conv2"
    print("-------------", conv2.name)


    bn_gamma2 = weights[weight_key_prefix_first + 'bn2.weight'].cpu().numpy()        # bn gamma
    bn_bias2 = weights[weight_key_prefix_first + 'bn2.bias'].cpu().numpy()          # bn beta
    bn_mean2 = weights[weight_key_prefix_first + 'bn2.running_mean'].cpu().numpy()  # bn mean
    bn_var2 = weights[weight_key_prefix_first + 'bn2.running_var'].cpu().numpy()   # bn var sqrt
    bn2 = batchnorm(conv2.get_output(0), network,
                    bn_gamma2, bn_bias2, bn_mean2, bn_var2)
    bn2.name =  weight_key_prefix_first + "bn2"

    print("----------------bn2.shpae: ",bn2.get_output(0).shape)
    print("-------------", bn2.name)

    # out_tensor = bn2.get_output(0) + input_tensor
    if downsample:
        # 
        conv_downsample_weight = weights[weight_key_prefix_first + "downsample."+ "0.weight"].cpu().numpy() 
        # conv_downsample = network.add_convolution(input=bn2.get_output(0), num_output_maps=out_channel, kernel_shape=(1, 1), kernel=conv_downsample_weight)
        
        conv_downsample = conv2d(input_tensor, out_channel,network,conv_downsample_weight,1)
        conv_downsample.stride = (stride, stride)
        conv_downsample.name = weight_key_prefix_first + "downsample."+ "conv0"
        print("-------------", conv_downsample.name)


        bn_downsample_gamma = weights[weight_key_prefix_first + 'downsample.' + '1.weight'].cpu().numpy()        # bn gamma
        bn_downsample_bias = weights[weight_key_prefix_first + "downsample."+ '1.bias'].cpu().numpy()          # bn beta
        bn_downsample_mean = weights[weight_key_prefix_first + "downsample."+ '1.running_mean'].cpu().numpy()  # bn mean
        bn_downsample_var = weights[weight_key_prefix_first + "downsample."+ '1.running_var'].cpu().numpy()   # bn var sqrt
        bn_downsample = batchnorm(conv_downsample.get_output(0), network,
                        bn_downsample_gamma, bn_downsample_bias, bn_downsample_mean, bn_downsample_var)
        bn_downsample.name = weight_key_prefix_first + 'downsample.' + '1'
        print("-------------", bn_downsample.name)

        
        residual =  bn_downsample.get_output(0)
        residual.name = weight_key_prefix_first + "residual"


        print( " downsampl residual shape:", residual.shape)


    
    residual_tensor = tensor_add(bn2.get_output(0), residual, network)

    relu_out = relu(residual_tensor.get_output(0), network)
    relu_out.name = weight_key_prefix_first + "relu_out"
    print("-------------", relu_out.name)
    # out_tensor = relu_out.get_output(0)

    return relu_out

对原代码中maker_layer进行模拟:

def make_layer(network, weights, input_tensor, out_channel, layer_id, block_sizes=2, stride=1):
    downsample = None
    if stride != 1 or 64 != out_channel :
        downsample = True
    sub1 = basic_block_network(input_tensor, network, weights, out_channel, stride=stride, layerweight_id=layer_id, layerweight_sub_id=0, downsample=downsample)
    sub2 = basic_block_network(sub1.get_output(0), network, weights, out_channel, stride=1, layerweight_id=layer_id, layerweight_sub_id=1)
    return sub2

最后组装:

"""
conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
bn1.running_mean torch.Size([64])
bn1.running_var torch.Size([64])
bn1.num_batches_tracked torch.Size([])
"""
def populate_network(network, weights):
    input_tensor = network.add_input(name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE)
    out_planes = 64
    
     # conv0 relu
    conv0_w = weights['conv1.weight'].cpu().numpy()
    # conv0_b = weights['cnn.conv_0.bias'].cpu().numpy()
    conv0 = network.add_convolution(input=input_tensor, num_output_maps=out_planes, kernel_shape=(7, 7), kernel=conv0_w)
    conv0.stride = (2, 2)
    conv0.padding = (3, 3)

    #bn1
    bn1_gamma1 = weights['bn1.weight'].cpu().numpy()        # bn gamma
    bn1_bias1 = weights['bn1.bias'].cpu().numpy()          # bn beta
    bn1_mean1 = weights['bn1.running_mean'].cpu().numpy()  # bn mean
    bn1_var1 = weights['bn1.running_var'].cpu().numpy()   # bn var sqrt
    bn1 = batchnorm(conv0.get_output(0), network, bn1_gamma1, bn1_bias1, bn1_mean1, bn1_var1)
    bn1.name =  'bn1'

    #relu0
    relu0 = network.add_activation(input=bn1.get_output(0), type=trt.ActivationType.RELU)
    #pulling1
    pooling1 = network.add_pooling(relu0.get_output(0), trt.PoolingType.MAX, (3, 3))
    pooling1.stride = (2, 2)
    pooling1.padding = (1, 1)

    
    # print("----------------pooling1.shpae: ",pooling1.get_output(0).shape)


    layer1_out = make_layer(network,weights,pooling1.get_output(0),64,layer_id=1, stride=1)
    print("----------------layer1_out.shpae: ",layer1_out.get_output(0).shape)

    layer2_out = make_layer(network,weights,layer1_out.get_output(0),128,layer_id=2, stride=2)
    print("----------------layer2_out.shpae: ",layer2_out.get_output(0).shape)

    layer3_out = make_layer(network,weights,layer2_out.get_output(0),256,layer_id=3, stride=2)
    print("----------------layer3_out.shpae: ",layer3_out.get_output(0).shape)

    layer4_out = make_layer(network,weights,layer3_out.get_output(0),512,layer_id=4, stride=2)
    print("----------------layer4_out.shpae: ",layer4_out.get_output(0).shape)

    layer1_out.get_output(0).name = "out1"
    layer2_out.get_output(0).name = "out2"
    layer3_out.get_output(0).name = "out3"
    layer4_out.get_output(0).name = "out4"
    network.mark_output(tensor=layer1_out.get_output(0))  # (1, 64, 80, 80
    network.mark_output(tensor=layer2_out.get_output(0))  # (1, 128, 40, 40
    network.mark_output(tensor=layer3_out.get_output(0))  # (1, 256, 20, 20
    network.mark_output(tensor=layer4_out.get_output(0))   # (1, 512, 10, 10

 

今天时间不多,先简单粘下代码,详细的可以去github上看看Resnet,比较简单。

你可能感兴趣的:(TensorRT实战,pytorch,深度学习,神经网络)