最近一段时间一直在负责转模型为tensorRT, 鉴于我们部门的模型现在基本上都是pytorch的了,其他框架暂时不考虑。这里总结一 下经验与方法,并放一个使用network API 转换 resnet18的例子,这个例子是我做另一个网络模型转换时的其中的骨干网络,其他部分差不太多,不过因为是工作中的内容,就不放出来了,这里使用resnet18为例。
对一个pytorch 模型,通常可以使用官方的方法,把它转成onnx,并使用TensorRT官方的,trtexec 工具转成模型,或使用网络定义api重写网络并赋值权重,先说几个经验:
你环境正常的情况下,是可以像普通程序一样调试的,当然构建过程可能看不了,不过trt的报错很清楚,只要注意给每层命名,你就能知道是哪个层出的问题,然后对照原代码,检查kernelsize 什么的参数是不是设置错了,一般来说如果设置错会报错说形状不一样的。
关键点:
找找官方api吧(https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/Network.html#) ,其实应该都有,比如我想找两个tensor相加,就没注意到是add_elementwise,点开看看IElementWiseLayer
的说明,就发现是它了,它可以对两个tensor做各种算术运算。
先看下pytorch 的resnet18的核心代码,具体代码可以去我的github上查看,请点:RESNET18。
class ResNet(nn.Module):
def __init__(self, block, layers, in_channels=3, dcn=None):
self.dcn = dcn
self.inplanes = 64
super(ResNet, self).__init__()
self.out_channels = []
self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dcn=dcn)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dcn=dcn)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dcn=dcn)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
if self.dcn is not None:
for m in self.modules():
if isinstance(m, Bottleneck) or isinstance(m, BasicBlock):
if hasattr(m, 'conv2_offset'):
constant_init(m.conv2_offset, 0)
def _make_layer(self, block, planes, blocks, stride=1, dcn=None):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, dcn=dcn))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dcn=dcn))
self.out_channels.append(planes * block.expansion)
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x2 = self.layer1(x)
x3 = self.layer2(x2)
x4 = self.layer3(x3)
x5 = self.layer4(x4)
return x2, x3, x4, x5
建议先用这个网络代码,保存下来一个模型权重文件,权重是随机初始化的还是训练好的不重要,可以调试就行了,然后把模型打印下,对结构有个了解,权重的所有key打印下对权重组成有个了解:
# torch.save(net.state_dict(), model_path)
weights = torch.load(model_path)
for k in weights:
print(k, weights[k].size())
"""
layer1.0.conv1.weight torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.bn1.running_mean torch.Size([64])
layer1.0.bn1.running_var torch.Size([64])
layer1.0.bn1.num_batches_tracked torch.Size([])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
"""
对resnet 基本block的实现 ,放一个函数里方便复用。注意BN层的实现:
def basic_block_network(input_tensor, network, weights, out_channel, stride=1, layerweight_id=1,layerweight_sub_id=0,downsample=None):
residual = input_tensor
weight_key_prefix_first = "layer" + str(layerweight_id) + "."+str(layerweight_sub_id)+"." # layerweight_id = 1-4 layerweight_sub_id = 0 or 1
print("---> weight_key_prefix_first:", weight_key_prefix_first)
print("input tensor shape ", input_tensor.shape)
conv1_w = weights[weight_key_prefix_first + 'conv1.weight'].cpu().numpy()
conv1 = conv3x3(input_tensor, out_channel, network, conv1_w, stride)
conv1.name = weight_key_prefix_first + 'conv1'
print("-------------", conv1.name)
print(weight_key_prefix_first+"conv1 .shape:", conv1.get_output(0).shape)
bn_gamma1 = weights[weight_key_prefix_first + 'bn1.weight'].cpu().numpy() # bn gamma
bn_bias1 = weights[weight_key_prefix_first + 'bn1.bias'].cpu().numpy() # bn beta
bn_mean1 = weights[weight_key_prefix_first + 'bn1.running_mean'].cpu().numpy() # bn mean
bn_var1 = weights[weight_key_prefix_first + 'bn1.running_var'].cpu().numpy() # bn var sqrt
bn1 = batchnorm(conv1.get_output(0), network, bn_gamma1, bn_bias1, bn_mean1, bn_var1)
bn1.name = weight_key_prefix_first + 'bn1'
print("-------------", bn1.name)
relu1 = relu(bn1.get_output(0), network)
relu1.name = weight_key_prefix_first + "relu1"
print("-------------", relu1.name)
conv2_w = weights[weight_key_prefix_first + 'conv2.weight'].cpu().numpy()
conv2 = conv3x3(relu1.get_output(0), out_channel, network, conv2_w)
conv2.name = weight_key_prefix_first + "conv2"
print("-------------", conv2.name)
bn_gamma2 = weights[weight_key_prefix_first + 'bn2.weight'].cpu().numpy() # bn gamma
bn_bias2 = weights[weight_key_prefix_first + 'bn2.bias'].cpu().numpy() # bn beta
bn_mean2 = weights[weight_key_prefix_first + 'bn2.running_mean'].cpu().numpy() # bn mean
bn_var2 = weights[weight_key_prefix_first + 'bn2.running_var'].cpu().numpy() # bn var sqrt
bn2 = batchnorm(conv2.get_output(0), network,
bn_gamma2, bn_bias2, bn_mean2, bn_var2)
bn2.name = weight_key_prefix_first + "bn2"
print("----------------bn2.shpae: ",bn2.get_output(0).shape)
print("-------------", bn2.name)
# out_tensor = bn2.get_output(0) + input_tensor
if downsample:
#
conv_downsample_weight = weights[weight_key_prefix_first + "downsample."+ "0.weight"].cpu().numpy()
# conv_downsample = network.add_convolution(input=bn2.get_output(0), num_output_maps=out_channel, kernel_shape=(1, 1), kernel=conv_downsample_weight)
conv_downsample = conv2d(input_tensor, out_channel,network,conv_downsample_weight,1)
conv_downsample.stride = (stride, stride)
conv_downsample.name = weight_key_prefix_first + "downsample."+ "conv0"
print("-------------", conv_downsample.name)
bn_downsample_gamma = weights[weight_key_prefix_first + 'downsample.' + '1.weight'].cpu().numpy() # bn gamma
bn_downsample_bias = weights[weight_key_prefix_first + "downsample."+ '1.bias'].cpu().numpy() # bn beta
bn_downsample_mean = weights[weight_key_prefix_first + "downsample."+ '1.running_mean'].cpu().numpy() # bn mean
bn_downsample_var = weights[weight_key_prefix_first + "downsample."+ '1.running_var'].cpu().numpy() # bn var sqrt
bn_downsample = batchnorm(conv_downsample.get_output(0), network,
bn_downsample_gamma, bn_downsample_bias, bn_downsample_mean, bn_downsample_var)
bn_downsample.name = weight_key_prefix_first + 'downsample.' + '1'
print("-------------", bn_downsample.name)
residual = bn_downsample.get_output(0)
residual.name = weight_key_prefix_first + "residual"
print( " downsampl residual shape:", residual.shape)
residual_tensor = tensor_add(bn2.get_output(0), residual, network)
relu_out = relu(residual_tensor.get_output(0), network)
relu_out.name = weight_key_prefix_first + "relu_out"
print("-------------", relu_out.name)
# out_tensor = relu_out.get_output(0)
return relu_out
def make_layer(network, weights, input_tensor, out_channel, layer_id, block_sizes=2, stride=1):
downsample = None
if stride != 1 or 64 != out_channel :
downsample = True
sub1 = basic_block_network(input_tensor, network, weights, out_channel, stride=stride, layerweight_id=layer_id, layerweight_sub_id=0, downsample=downsample)
sub2 = basic_block_network(sub1.get_output(0), network, weights, out_channel, stride=1, layerweight_id=layer_id, layerweight_sub_id=1)
return sub2
"""
conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
bn1.running_mean torch.Size([64])
bn1.running_var torch.Size([64])
bn1.num_batches_tracked torch.Size([])
"""
def populate_network(network, weights):
input_tensor = network.add_input(name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE)
out_planes = 64
# conv0 relu
conv0_w = weights['conv1.weight'].cpu().numpy()
# conv0_b = weights['cnn.conv_0.bias'].cpu().numpy()
conv0 = network.add_convolution(input=input_tensor, num_output_maps=out_planes, kernel_shape=(7, 7), kernel=conv0_w)
conv0.stride = (2, 2)
conv0.padding = (3, 3)
#bn1
bn1_gamma1 = weights['bn1.weight'].cpu().numpy() # bn gamma
bn1_bias1 = weights['bn1.bias'].cpu().numpy() # bn beta
bn1_mean1 = weights['bn1.running_mean'].cpu().numpy() # bn mean
bn1_var1 = weights['bn1.running_var'].cpu().numpy() # bn var sqrt
bn1 = batchnorm(conv0.get_output(0), network, bn1_gamma1, bn1_bias1, bn1_mean1, bn1_var1)
bn1.name = 'bn1'
#relu0
relu0 = network.add_activation(input=bn1.get_output(0), type=trt.ActivationType.RELU)
#pulling1
pooling1 = network.add_pooling(relu0.get_output(0), trt.PoolingType.MAX, (3, 3))
pooling1.stride = (2, 2)
pooling1.padding = (1, 1)
# print("----------------pooling1.shpae: ",pooling1.get_output(0).shape)
layer1_out = make_layer(network,weights,pooling1.get_output(0),64,layer_id=1, stride=1)
print("----------------layer1_out.shpae: ",layer1_out.get_output(0).shape)
layer2_out = make_layer(network,weights,layer1_out.get_output(0),128,layer_id=2, stride=2)
print("----------------layer2_out.shpae: ",layer2_out.get_output(0).shape)
layer3_out = make_layer(network,weights,layer2_out.get_output(0),256,layer_id=3, stride=2)
print("----------------layer3_out.shpae: ",layer3_out.get_output(0).shape)
layer4_out = make_layer(network,weights,layer3_out.get_output(0),512,layer_id=4, stride=2)
print("----------------layer4_out.shpae: ",layer4_out.get_output(0).shape)
layer1_out.get_output(0).name = "out1"
layer2_out.get_output(0).name = "out2"
layer3_out.get_output(0).name = "out3"
layer4_out.get_output(0).name = "out4"
network.mark_output(tensor=layer1_out.get_output(0)) # (1, 64, 80, 80
network.mark_output(tensor=layer2_out.get_output(0)) # (1, 128, 40, 40
network.mark_output(tensor=layer3_out.get_output(0)) # (1, 256, 20, 20
network.mark_output(tensor=layer4_out.get_output(0)) # (1, 512, 10, 10
今天时间不多,先简单粘下代码,详细的可以去github上看看Resnet,比较简单。