目录
环境安装:
pycuda安装:
开源项目信息
fasterRCNN的训练
faster rcnn onnx实践
第3步测试结果:
第4步测试结果:
单张图片测试代码:
多张图片预测代码:
第5步做了修改:
第7步测试:
第8步测试
测试报错The input tensor cannot be reshaped to the requested shape:
正确类别数量设置:
测试正确结果:
onnx转trt操作
c++ 转换trt:
Onnx转trt代码及操作:
c++ tensorrt推理部分
c++onnx转tensorrt
打印模型输入输出参数:
可以忽略的报错:
pip install onnxsim
pytorch安装:
cuda版本是11.0,没有cuda11.0对应的torchvision,所以安装了cpu版:
pip install torch==1.10.0+cpu torchvision==0.11.0+cpu torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html
pip install torch==1.10.0+cu113 torchvision==0.11.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
tensorrt安装:
onnx转tensorrt 实战干货总结_AI视觉网奇的博客-CSDN博客_onnx转tensorrt
win10安装pycuda2022_AI视觉网奇的博客-CSDN博客
根据开源项目进行部署测试:
GitHub - thb1314/tensorrt-onnx-fasterrcnn-fpn-roialign
看了代码,感觉主要是把faster rcnn拆分为两个部分,
第1部分:
AnchorGenerator
RPNHead
RegionProposalNetwork
GeneralizedRCNNTransform
第2部分:
MultiScaleRoIAlign
TwoMLPHead
FastRCNNPredictor
RoIHeads,这个又依赖前面三个。
本部分基于repo
GitHub - shouxieai/tensorRT_Pro: C++ library based on tensorrt integration
安装部分请看该项目的readme部分,在本项目文件下tensorrt_code
下
如果可以给该项目点个star的话,麻烦顺手给俺也点一个吧,谢谢。
demo: 使用pytorch训练自己的Faster-RCNN目标检测模型 - 野生鹅鹅 - 博客园
其他git code还请自行查找
导出onnx的时候,需要加载加载自己训练的权重。
重要的环节是第3步和 第5步,
作者提供了8个步骤:
x01export_FasterRCNN_onnx.py
x02test_FasterRCNN_onnx.py
x03extract_RPN.py
x04testRPNonnx.py
x05extract_ROIHeader.py
x06reduceRpnOnnx.py
x07reduce_header_onnx.py
x08test_header_onnx.py
第一个步骤转onnx警告和解决方法:
that if the size of dimension 1 of the input is not 1, the ONNX model will return an error_AI视觉网奇的博客-CSDN博客
输出特征名:
['rpn_boxes', 'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_pool']
输出特征维度:
1. list 1000*5
2. list,长度4
0: 1 256 200 264
1: 1 256 100 132
2: 1 256 50 66
3: 1 256 25 33
pool: 256 13 17
3是图片
4是维度
原本代码batch_size为1时可以正确预测,但是batch_size为2时,预测结果都是第一张图片,
稍微修改了代码:
import torch
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
from model import fasterrpn_resnet50_fpn
import glob
from torchvision import transforms
import cv2
if __name__ == '__main__':
model = fasterrpn_resnet50_fpn(pretrained=True)
model.eval()
img_tensor_list = list()
transform_func = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_height, input_width = (600 + 31) // 32 * 32, (800 + 31) // 32 * 32
image_list = list()
for item in glob.glob("./*.jpg"):
image_list.append(cv2.resize(cv2.imread(item), dsize=(input_width, input_height)))
img_tensor_list.append(
transform_func(cv2.cvtColor(cv2.resize(cv2.imread(item), dsize=(input_width, input_height)), cv2.COLOR_BGR2RGB)))
with torch.no_grad():
results = model(img_tensor_list, is_show=True)
result = results[0]
for i, item in enumerate(result):
image = image_list[i].copy()
for score_box in item:
box = score_box[1:]
box = box.numpy()
cv2.rectangle(image, tuple(map(int, box[0:2])), tuple(map(int, box[2:4])), (0, 255, 0))
cv2.imshow("win", image)
cv2.waitKey()
cv2.destroyWindow("win")
output_names = ["rpn_boxes", *tuple(['feature_'+item for item in results[1].keys()])]
print(output_names)
dynamic_axes = {'input':{0: "N"},'rpn_boxes': {0: "N"},'feature_0': {0: "N"}, 'feature_1': {0: "N"}, 'feature_2': {0: "N"}, 'feature_3': {0: "N"}, 'feature_pool': {0: "N"}}
onnx_save_path = 'rpn_backbone_resnet50.onnx'
torch.onnx.export(model, torch.rand(2, 3, input_height, input_width), onnx_save_path, verbose=False,
do_constant_folding=True,
input_names=["input"], output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=11)
import onnxsim
import onnx
model = onnx.load(onnx_save_path)
# convert model
model_simp, check = onnxsim.simplify(model, check_n=0,input_shapes={'input':[-1,3,input_height,input_width]},
dynamic_input_shape=True)
# dynamic_input_shape=False)
with open(onnx_save_path,'wb') as f:
# with open(onnx_save_path.replace(".onnx","_simp.onnx"),'wb') as f:
onnx.save(model_simp, f)
多图片可以批量预测了,但是预测结果都不对。
import onnxruntime as rt
import numpy as np
import torch
import torchvision
import cv2
from torchvision import transforms
def get_classes(filepath):
with open(filepath, 'r', encoding='gbk') as f:
return [item.strip() for item in f.readlines()]
if __name__ == '__main__':
onnx_save_path = "rpn_backbone_resnet50.onnx"
img = cv2.imread('./car.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, dsize=(800, 608))
normalize = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
img_tensor = normalize(img).unsqueeze(dim=0)
img_input = img_tensor.numpy().astype(np.float32)
sess = rt.InferenceSession(onnx_save_path)
input_name = sess.get_inputs()[0].name
label_names = [sess.get_outputs()[i].name for i in range(1)]
print("input_name",input_name)
pred_onnx = sess.run(label_names, {input_name:img_input})
print("label_names", label_names,"pred size",pred_onnx[0].shape)
# output without nms
pred_onnx = dict(zip(label_names, pred_onnx))
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
for box in pred_onnx['rpn_boxes'][0]:
box = box[1:]
cv2.rectangle(image, tuple(map(int,box[0:2])), tuple(map(int,box[2:4])), (0,255,0))
cv2.imshow("img", img)
cv2.imshow("win", image)
cv2.waitKey()
cv2.destroyWindow("win")
步骤4测试可视化结果,与3是不一样的, 4390*6,步骤3的结果是1000*6
第四步backbone输入输出维度:
input_name input
label_names ['rpn_boxes'] pred size (1, 4390, 6)
import onnxruntime as rt
import numpy as np
import torch
import torchvision
import cv2
from torchvision import transforms
def get_classes(filepath):
with open(filepath, 'r', encoding='gbk') as f:
return [item.strip() for item in f.readlines()]
if __name__ == '__main__':
onnx_save_path = "rpn_backbone_resnet50.onnx"
img = cv2.imread('./car.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, dsize=(800, 608))
print(img.shape)
normalize = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
img_tensor = normalize(img)#.unsqueeze(dim=0)
img_input = img_tensor.numpy().astype(np.float32)
img_input=np.array([img_input,img_input])
sess = rt.InferenceSession(onnx_save_path)
input_name = sess.get_inputs()[0].name
for data in sess.get_outputs():
print("outname",data.name)
label_names = [sess.get_outputs()[i].name for i in range(1)]
print("input_name",input_name)
pred_onnx = sess.run(label_names, {input_name:img_input})
print("label_names", label_names,"pred size",pred_onnx[0].shape)
# output without nms
pred_onnx = dict(zip(label_names, pred_onnx))
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
for box in pred_onnx['rpn_boxes'][0]:
box = box[1:]
cv2.rectangle(image, tuple(map(int,box[0:2])), tuple(map(int,box[2:4])), (0,255,0))
cv2.imshow("img", img)
cv2.imshow("win", image)
cv2.waitKey()
cv2.destroyWindow("win")
改完支持多batch_size,但是多张图片预测,结果都是第一张图片的。
import torch
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
from model import fasterrpn_resnet50_fpn, fasterroiheader_resnet50_fpn
import math
import glob
from torchvision import transforms
import cv2
import os
if __name__ == '__main__':
model = fasterrpn_resnet50_fpn(pretrained=True)
model_header = fasterroiheader_resnet50_fpn(pretrained=True, transform=model.transform, box_score_thresh=0.5,box_nms_thresh=0.3)
model.eval()
model_header.eval()
img_tensor_list = list()
transform_func = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_height, input_width = (600 + 31) // 32 * 32, (800 + 31) // 32 * 32
image_list = list()
for item in glob.glob("./*.jpg"):
image_list.append(cv2.resize(cv2.imread(item), dsize=(input_width, input_height)))
img_tensor_list.append(
transform_func(cv2.cvtColor(cv2.resize(cv2.imread(item), dsize=(input_width, input_height)), cv2.COLOR_BGR2RGB)))
with torch.no_grad():
proposals, features, images, original_image_sizes = model(img_tensor_list)
if not os.path.exists('buffle.pkl'):
with open('buffle.pkl', 'wb') as f:
torch.save({
'proposals':proposals,
'features':features,
'image_sizes':images.image_sizes
}, f)
for feature in features.values():
print('feature.shape',feature.shape)
proposals = [item[:,1:] for item in proposals]
if not os.path.exists('roi_result.pkl'):
roi_result = model_header.roi_heads.box_roi_pool(features, proposals, images.image_sizes)
with open('roi_result.pkl', 'wb') as f:
torch.save({
'roi_result':roi_result
}, f)
dummy_image = torch.rand(1, 3, input_height, input_width)
batch_size = int(dummy_image.size(0))
dummy_proposals = [torch.rand((model.rpn.post_nms_top_n(), 4)) for _ in range(batch_size)]
height, width = int(dummy_image.size(2)), int(dummy_image.size(3))
dummy_features = {
key: torch.rand(batch_size, model.backbone.out_channels, math.ceil(height / (2 ** (i + 2))),
math.ceil(width / (2 ** (i + 2)))) for i, key in enumerate(features.keys())}
input_names = [*tuple(['feature_' + key for key in dummy_features.keys()]), 'proposals']
dynamic_axes = {'proposals': {0: "N"}}
dynamic_axes.update({'feature_'+key: {0: "B"} for key in dummy_features.keys()})
dynamic_axes.update({name: {0: "N"} for name in ['outputs']})
class Wrapper(torch.nn.Module):
def __init__(self, image_sizes, model):
super(Wrapper, self).__init__()
self.image_sizes = image_sizes
self.model = model
def forward(self, x, boxes):
return self.model(x, boxes, self.image_sizes)
"""
torch.onnx.export(Wrapper(images.image_sizes, model_header.roi_heads.box_roi_pool), (features, dummy_proposals),
"roialign.onnx", verbose=True,
do_constant_folding=True,
input_names=input_names, output_names=["outputs"],
dynamic_axes=dynamic_axes,
opset_version=11)
print(roi_result.shape)
"""
result = model_header(features, proposals, images, original_image_sizes)
for i, item in enumerate(result):
image = image_list[i].copy()
for score_box in item['boxes']:
box = score_box
box = box.numpy()
cv2.rectangle(image, tuple(map(int, box[0:2])), tuple(map(int, box[2:4])), (0, 255, 0))
cv2.imshow("win", image)
cv2.waitKey()
cv2.destroyWindow("win")
output_names = ["boxes", "labels", "scores"]
dummy_image = torch.rand(1, 3, input_height, input_width)
batch_size = int(dummy_image.size(0))
dummy_proposals = [torch.rand((model.rpn.post_nms_top_n(), 4)) for _ in range(batch_size)]
height,width = int(dummy_image.size(2)),int(dummy_image.size(3))
dummy_features = {key:torch.rand(batch_size, model.backbone.out_channels, math.ceil(height / (2 ** (i + 2))), math.ceil(width / (2 ** (i + 2)))) for i,key in enumerate(features.keys())}
print(dummy_features.keys())
input_names = [*tuple(['feature_'+key for key in dummy_features.keys()]), 'proposals']
dynamic_axes = {'proposals': {0: "N"}}
dynamic_axes.update({name: {0: "N"} for name in output_names})
onnx_save_path = "header.onnx"
torch.onnx.export(model_header, (dummy_features, dummy_proposals, dummy_image), onnx_save_path, verbose=True,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=11)
第6步测试backbone输出维度:
import onnx_graphsurgeon as gs
import onnx
def cutOnnx():
onnx_save_path = "rpn_backbone_resnet50.onnx"
graph = gs.import_onnx(onnx.load(onnx_save_path))
for output in graph.outputs:
print(0,output)
graph.outputs = graph.outputs[0:-1]
for output in graph.outputs:
print(1,output)
graph.cleanup()
# remove feature pool
onnx.save(gs.export_onnx(graph), onnx_save_path)
if __name__ == '__main__':
cutOnnx()
结果:
#0 Variable (rpn_boxes): (shape=[1, 4390, 6], dtype=float32) # 0 Variable (feature_0): (shape=[1, 256, 152, 200], dtype=float32) # 0 Variable (feature_1): (shape=[1, 256, 76, 100], dtype=float32) # 0 Variable (feature_2): (shape=[1, 256, 38, 50], dtype=float32) # 0 Variable (feature_3): (shape=[1, 256, 19, 25], dtype=float32) # 1 Variable (rpn_boxes): (shape=[1, 4390, 6], dtype=float32) # 1 Variable (feature_0): (shape=[1, 256, 152, 200], dtype=float32) # 1 Variable (feature_1): (shape=[1, 256, 76, 100], dtype=float32) # 1 Variable (feature_2): (shape=[1, 256, 38, 50], dtype=float32)
x07reduce_header_onnx.py
:精简header onnx,仅保留全连接层部分。具体细节:
1.将网络的输入更改为roialigned_feature
和proposals
,去掉roi align和fpn_level的计算部分。
tensor = tensors["218"]
Line11 这段代码即对该reshape操作输入的替换。
218哪里来的?
netron软件查看的,右下角INPUTS的 data name218
这个层在整个结构的大概位置:
这里可以根据自己的生成的onnx改变名字,下面是局部放大点的图:
对输出的box和score节点的前面的reshape操作进行处理
对应一下几行代码
这里需要根据自己的生成的onnx修改,切记
shape_score = gs.Constant(name="shape_score", values=np.array((-1, 90), dtype=np.int64)) shape_boxes = gs.Constant(name="shape_boxes", values=np.array((-1, 90, 4), dtype=np.int64)) shape_boxes_last_node = gs.Constant(name="shape_boxes_last_node", values=np.array((-1, 91, 4), dtype=np.int64)) # 这里的Reshape_320和Reshape_322是box和score的上一个reshape节点 # 这里填写上面的框选的部分分 for node in graph.nodes: if node.name == "Reshape_320": node.inputs[-1] = shape_boxes elif node.name == "Reshape_322": node.inputs[-1] = shape_score # the last second reshape node relative to box output elif node.name == "Reshape_308": node.inputs[-1] = shape_boxes_last_node
好像报错了:
[W] colored module is not installed, will not use colors when logging. To enable colors, please install the colored module: python3 -m pip install colored
[E] No function: shape registered for opset: 11
[W] colored module is not installed, will not use colors when logging. To enable colors, please install the colored module: python3 -m pip install colored
[E] No function: __len__ registered for opset: 11
但是没有红色显示
清华园好像不能安装了,用豆瓣的源可以安装:
pip install colored -i https://pypi.doubanio.com/simple
第7步按照作者的转换脚本,开始的时候报错,代码:
import onnx_graphsurgeon as gs
import onnx
import numpy as np
def cutOnnx():
onnx_save_path = "header.onnx"
graph = gs.import_onnx(onnx.load(onnx_save_path))
tensors = graph.tensors()
tensor = tensors["218"]
graph.inputs = [graph.inputs[-1], tensor.to_variable(dtype=np.float32, shape=('N', 256, 7, 7))]
graph.inputs[-1].name = "roialigned_feature"
graph.outputs = [graph.outputs[0], graph.outputs[-1]]
shape_score = gs.Constant(name="shape_score", values=np.array((-1, 90), dtype=np.int64))
shape_boxes = gs.Constant(name="shape_boxes", values=np.array((-1, 90, 4), dtype=np.int64))
shape_boxes_last_node = gs.Constant(name="shape_boxes_last_node", values=np.array((-1, 91, 4), dtype=np.int64))
# 这里的Reshape_320和Reshape_322是box和score的上一个reshape节点
for node in graph.nodes:
if node.name == "Reshape_320":
node.inputs[-1] = shape_boxes
elif node.name == "Reshape_322":
node.inputs[-1] = shape_score
# the last second reshape node relative to box output
elif node.name == "Reshape_308":
node.inputs[-1] = shape_boxes_last_node
# 添加N,90,4 和 N,90,1的结点
for item in graph.outputs:
item.shape.insert(1, 90)
# print(item.shape)
for graph_output in graph.outputs:
graph_output.shape[0] = 'N'
graph.cleanup()
new_onnx_filepath = 'new_'+onnx_save_path
onnx.save(gs.export_onnx(graph), new_onnx_filepath)
import onnxsim
model = onnx.load(new_onnx_filepath)
# convert model
model_simp, check = onnxsim.simplify(model, check_n=0,input_shapes={'roialigned_feature':[1,256, 7, 7],'proposals':[1,4]},
dynamic_input_shape=True)
onnx.save(model_simp, new_onnx_filepath)
if __name__ == '__main__':
cutOnnx()
报错:
tensor = tensors["218"]
KeyError: '218'
torch换到作者的版本1.10,这个报错没有了。
修改了一下,支持批量预测:
import onnx_graphsurgeon as gs
import onnx
import numpy as np
def cutOnnx():
onnx_save_path = "header.onnx"
graph = gs.import_onnx(onnx.load(onnx_save_path))
tensors = graph.tensors()
for key, value in tensors.items():
print(key , value)
# tensor = tensors["onnx::Reshape_325"]
tensor = tensors["218"]
graph.inputs = [graph.inputs[-1], tensor.to_variable(dtype=np.float32, shape=('batch_size', 256, 7, 7))]
graph.inputs[-1].name = "roialigned_feature"
graph.outputs = [graph.outputs[0], graph.outputs[-1]]
shape_score = gs.Constant(name="shape_score", values=np.array((-1, 90), dtype=np.int64))
shape_boxes = gs.Constant(name="shape_boxes", values=np.array((-1, 90, 4), dtype=np.int64))
shape_boxes_last_node = gs.Constant(name="shape_boxes_last_node", values=np.array((-1, 91, 4), dtype=np.int64))
# 这里的Reshape_320和Reshape_322是box和score的上一个reshape节点
for node in graph.nodes:
if node.name == "Reshape_320":
node.inputs[-1] = shape_boxes
elif node.name == "Reshape_322":
node.inputs[-1] = shape_score
# the last second reshape node relative to box output
elif node.name == "Reshape_308":
node.inputs[-1] = shape_boxes_last_node
# 添加N,90,4 和 N,90,1的结点
for item in graph.outputs:
item.shape.insert(1, 90)
# print(item.shape)
for graph_output in graph.outputs:
graph_output.shape[0] = 'N'
graph.cleanup()
new_onnx_filepath = 'new_'+onnx_save_path
onnx.save(gs.export_onnx(graph), new_onnx_filepath)
import onnxsim
model = onnx.load(new_onnx_filepath)
# convert model
model_simp, check = onnxsim.simplify(model, check_n=0,input_shapes={'roialigned_feature':[-1,256, 7, 7],'proposals':[-1,4]},
dynamic_input_shape=True)
onnx.save(model_simp, new_onnx_filepath)
if __name__ == '__main__':
cutOnnx()
import onnxruntime as rt
import numpy as np
if __name__ == '__main__':
sess = rt.InferenceSession('new_header.onnx')
input_names = [item.name for item in sess.get_inputs()]
output_names = [item.name for item in sess.get_outputs()]
# proposal = np.array([1,1,10,10], dtype=np.float32).reshape(-1, 4)
batch_size = 1
input_dict = dict(
proposals = np.random.randn(batch_size, 4).astype(dtype=np.float32),
roialigned_feature = np.random.randn(batch_size, 256, 7, 7).astype(dtype=np.float32)
)
pred_onnx = sess.run(output_names, input_dict)
pred_onnx = dict(zip(output_names, pred_onnx))
print(pred_onnx['boxes'].shape)
# print(pred_onnx['boxes'])
print(pred_onnx['scores'].shape)
# print(pred_onnx['scores'])
Traceback (most recent call last):
File "D:/work/faster_rcnn_trt/FasterRCNN-resnet50FPN-main/test/x08test_header_onnx.py", line 18, in
pred_onnx = sess.run(output_names, input_dict)
File "C:\Users\admin\AppData\Roaming\Python\Python38\site-packages\onnxruntime\capi\onnxruntime_inference_collection.py", line 200, in run
return self._sess.run(output_names, input_feed, run_options)
onnxruntime.capi.onnxruntime_pybind11_state.RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running Reshape node.
Name:'Reshape_308' Status Message: D:\a\_work\1\s\onnxruntime\core\providers\cpu\tensor\reshape_helper.h:36 onnxruntime::ReshapeHelper::ReshapeHelper size != 0 && (input_shape.Size() % size) == 0 was false.
The input tensor cannot be reshaped to the requested shape. Input shape:{8,22,2,2}, requested shape:{-1,3,4}
导出时类别数设置不对,
1. train_res50_fpn.py 中
num_classes=2
2. x03extract_RPN.py中:
num_classes = 2+1
3.x05extract_ROIHeader.py 中:
num_classes = 2 + 1
model = fasterrpn_resnet50_fpn(pretrained=False,num_classes=num_classes)
# weights_dict = torch.load(r"../save_weights/resNetFpn-model-14.pth", map_location='cpu')
# model.load_state_dict(weights_dict['model'],strict=False)
model_header = fasterroiheader_resnet50_fpn(pretrained=False, transform=model.transform, box_score_thresh=0.5,box_nms_thresh=0.3,num_classes=num_classes)
4.x07reduce_header_onnx.py中:
n_class = 2
结果bath_size为1时正确,大于1就报错。
输入两个参数:proposals 和roialigned_feature
维度为 batch_size,4
和batch_size,256, 7, 7
输出:
(1, 90, 4)
(1, 90)
builder\trt_builder.cpp
报错代码:
void set_layer_hook_reshape(const LayerHookFuncReshape& func){
//register_layerhook_reshape(func);
}
register_layerhook_reshape函数是在NvOnnxParser.cpp中,
OnnxParser代码是在生成的时候用,需要protobuf,版本未知,
protobuf-cpp-3.11.4
pytorch转onnx
开源项目给了转换代码:
test/x01export_FasterRCNN_onnx.py
onnx测试代码:
test/x02test_FasterRCNN_onnx.py
onnx转tensorrt 实战干货总结_AI视觉网奇的博客-CSDN博客_onnx转tensorrt
博客的目录:onnx转tensorrt 分类成功
转trt报错了
[08/06/2022-11:35:29] [TRT] [E] [graphShapeAnalyzer.cpp::nvinfer1::builder::`anonymous-namespace'::ShapeNodeRemover::analyzeShapes::1285] Error Code 4: Miscellaneous (IShuffleLayer Reshape_1226: reshape changes volume. Reshaping [720588174] to [1,4507].)
Completed parsing of ONNX file
Building an engine from file F:\project\jushi\tensorrt-onnx-fasterrcnn-fpn-roialign-master\test\fasterrcnn_backbone_resnet50_fpn_roialign.onnx; this may take a while...
[08/06/2022-11:35:29] [TRT] [E] 4: [network.cpp::nvinfer1::Network::validate::2633] Error Code 4: Internal Error (Network must have at least one output)
解决方法,手动设置最后一层:
last_layer = network.get_layer(network.num_layers - 1)
network.mark_output(last_layer.get_output(0))
代码:
def ONNX_build_engine(onnx_file_path, write_engine=True):
# 通过加载onnx文件,构建engine
# :param onnx_file_path: onnx文件路径
# :return: engine
G_LOGGER = trt.Logger(trt.Logger.WARNING)
# 1、动态输入第一点必须要写的
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
batch_size = 8 # trt推理时最大支持的batchsize
with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network, G_LOGGER) as parser:
builder.max_batch_size = batch_size
config = builder.create_builder_config()
config.max_workspace_size = GiB(2)
config.set_flag(trt.BuilderFlag.FP16)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
# 重点
profile = builder.create_optimization_profile() # 动态输入时候需要 分别为最小输入、常规输入、最大输入
# 有几个输入就要写几个profile.set_shape 名字和转onnx的时候要对应
# tensorrt6以后的版本是支持动态输入的,需要给每个动态输入绑定一个profile,用于指定最小值,常规值和最大值,如果超出这个范围会报异常。
profile.set_shape("inputs", (1, 3, 600, 600), (8, 3, 600, 600), (16, 3, 600, 600))
config.add_optimization_profile(profile)
last_layer = network.get_layer(network.num_layers - 1)
network.mark_output(last_layer.get_output(0))
engine = builder.build_engine(network, config)
print("Completed creating Engine")
# 保存engine文件
if write_engine:
engine_file_path = 'efficientnet_b1.trt'
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
但是维度不能对齐:
ShapeNodeRemover::analyzeShapes::1285] Error Code 4: Miscellaneous (IShuffleLayer Reshape_1226: reshape changes volume. Reshaping [720588174] to [1,4507].)
torch升级版本后,转trt报错变成了:
Error Code 9: Internal Error (Floor_45: IUnaryLayer cannot be used to compute a shape tensor)
backbone部分可以转trt: rpn_backbone_resnet50.onnx
也可以python转,
onnx生成tensorrt的时候,用的自带的代码,自带的,需要protobuf,
“google/protobuf/port_def.inc”:
官方的faster rcnn导出trt报错:
reshape changes volume. Reshaping [720588174] to [1,4507].)
fasterrcnn.cpp:
缩放尺寸,归一化:
virtual bool preprocess(Job& job, const Mat& image) override {
job.mono_tensor = tensor_allocator_->query();
if (job.mono_tensor == nullptr) {
INFOE("Tensor allocator query failed.");
return false;
}
CUDATools::AutoDevice auto_device(gpu_);
auto& tensor = job.mono_tensor->data();
if (tensor == nullptr) {
// not init
tensor = make_shared();
tensor->set_workspace(make_shared());
}
Size input_size(input_width_, input_height_);
job.additional.compute(image.size(), input_size);
tensor->set_stream(stream_);
tensor->resize(1, 3, input_height_, input_width_);
virtual bool preprocess(Job& job, const Mat& image) override{
void InferImpl::print(){
xxxxx
}
宽800,高608,宽高比和car图片相反。
推理报错:
sub_model推理报错:
[][error][trt_builder.cpp:30]:NVInfer: 3: [executionContext.cpp::nvinfer1::rt::ExecutionContext::setBindingDimensions::944] Error Code 3: API Usage Error (Parameter check failed at: executionContext.cpp::nvinfer1::rt::ExecutionContext::setBindingDimensions::944, condition: profileMaxDims.d[i] >= dimensions.d[i]. Supplied binding dimension [787,256,7,7] for bindings[1] exceed min ~ max range at index 0, maximum dimension in profile is 1, minimum dimension in profile is 1, but supplied dimension is 787.
)
787是anchors的数量:new_header需要的为1。
int number_anchors = roi_align_inputs_index / 6;
感觉就是backbone和new_header的维度没对上。
new_header.onnx
tensorrt推理:
app_fasterrcnn\fasterrcnn.cpp
//forward
engine->forward(false);
engine->synchronize();
infer\trt_infer.cpp
bool execute_result = context->context_->enqueueV2
N和bach_size是一样的。
WARNING: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function.
WARNING: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function.
TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results).