整个triton的目录结构如下:
运行下面的脚本回生成一个dali后缀的图像处理引擎,这里面的细节参数请看dali的官方文档、
import nvidia.dali as dali
@dali.pipeline_def(batch_size=128, num_threads=4, device_id=0)
def pipe():
img0 = dali.fn.external_source(device="cpu", name="IMG1")
images = dali.fn.resize(img0, resize_x=640, resize_y=384)
images = dali.fn.crop_mirror_normalize(images,dtype=dali.types.FLOAT,output_layout="CHW",std=[255,255,255])
return images
def main():
pipe().serialize(filename='model.dali')
# pipe.build()
# o = pipe.run()
if __name__ == '__main__':
main()
将dali生成的dali文件放到triton内,并定义好输入输出的config.pbtxt文件
config.pbtxt文件:
name: "dalli_preprocess"
backend: "dali"
max_batch_size: 128
input [
{
name: "IMG1"
data_type: TYPE_UINT8
dims: [ 360, 640, 3 ]
}
]
output [
{
name: "DALI_PRE_OUT"
data_type: TYPE_FP32
dims: [3, 384, 640 ]
}
]
instance_group [
{
count: 5
kind: KIND_CPU
}
]
最后的目录结构应该是这样的
models
└── dalli_preprocess
├── 1
│ └── model.dali
└── config.pbtxt
这里用到了tensorrtx项目:https://github.com/wang-xinyu/tensorrtx
可自行查阅相关资料,最后我们会得到一个"yolov5.engine"和一个"myplugin.so"
yolov5.engine 重命名为 model.plan
定义config.pbtxt文件
name: "detect_yolov5"
platform: "tensorrt_plan"
max_batch_size: 128
input [
{
name: "data"
data_type: TYPE_FP32
dims: [3, 384, 640 ]
}
]
output [
{
name: "prob"
data_type: TYPE_FP32
dims: [ 6001, 1, 1]
}
]
instance_group [
{
count: 2
kind: KIND_GPU
}
]
将之前创建的各种文件按如下文件夹目录放置
models
├── dalli_preprocess
│ ├── 1
│ │ └── model.dali
│ └── config.pbtxt
│
└── detect_yolov5(新)
├── 1(新)
│ └── model.plan(新)
└── config.pbtxt(新)
plugins(新)
└──libmyplugins.so(新)
这里的推理后端环境要自己搭建,搭建脚本如下,请仔细阅读根据实际情况删减
conda create -n yolov5_base64_input python=3.8
conda activate yolov5_base64_input
export PYTHONNOUSERSITE=True
conda install pytorch torchvision cudatoolkit=11.6 -c pytorch -c conda-forge
conda install numpy pillow conda-pack
pip install pybase64
conda-pack
#这里会将整个环境打包得到一个tar.gz的包
conda deactivate
conda env remove -n yolov5_base64_input
后处理脚本
import json
import numpy as np
import triton_python_backend_utils as pb_utils
from PIL import Image
from processing import preprocess, postprocess
import re
import base64
from io import BytesIO
class TritonPythonModel:
def initialize(self, args):
self.model_config = json.loads(args['model_config'])
def execute(self, requests):
responses = []
for request in requests:
# # 获取请求数据
# img3_base64 = img3_base64.as_numpy().astype(np.bytes_)
nms_threshold = pb_utils.get_input_tensor_by_name(request, "POST_INPUT_NMS")
confidence = pb_utils.get_input_tensor_by_name(request, "POST_INPUT_CONF")
tersor = pb_utils.get_input_tensor_by_name(request, "POST_INPUT_TENSOR")
yolov5_result = tersor.as_numpy().astype(np.float32)
nms_threshold = nms_threshold.as_numpy().astype(np.float32)[0]
confidence = confidence.as_numpy().astype(np.float32)[0]
out = {}
for i in range(len(yolov5_result)):
# 这里调用的就是后处理了,参考yolov5原作者处理方式
detected_objects = postprocess(np.array([yolov5_result[i]]), 680, 384, confidence, nms_threshold)
pic_per = list(map(lambda x :x.out_str(), detected_objects))
out['pic_'+str(i)] = pic_per
out_tensor_0 = pb_utils.Tensor(
"POST_OUTPUT",
np.array([str(out)]).astype(np.bytes_))
responses.append(pb_utils.InferenceResponse([out_tensor_0]))
return responses
def finalize(self):
"""`finalize` is called only once when the model is being unloaded.
Implementing `finalize` function is OPTIONAL. This function allows
the model to perform any necessary clean ups before exit.
"""
print('Cleaning up...')
定义输入输出config,pbtxt(需要把环境制定,这里的路径是docker的)
name: "detect_postprocess"
backend: "python"
max_batch_size: 128
input [
{
name: "POST_INPUT_TENSOR"
data_type: TYPE_FP32
dims: [ 6001,1,1]
},
{
name: "POST_INPUT_CONF"
data_type: TYPE_FP32
dims: [ 1]
},
{
name: "POST_INPUT_NMS"
data_type: TYPE_FP32
dims: [ 1]
}
]
output [
{
name: "POST_OUTPUT"
data_type: TYPE_STRING
dims: [ 1 ]
}
]
parameters: {
key: "EXECUTION_ENV_PATH",
value: {string_value: "/plugins/yolov5_base64_input.tar.gz"}
}
instance_group [
{
count: 5
kind: KIND_GPU
}
]
此时目录结构:
models
├── dalli_preprocess
│ ├── 1
│ │ └── model.dali
│ └── config.pbtxt
│
├── detect_postprocess(新)
│ ├── 1(新)
│ │ ├── boundingbox.py(新)
│ │ ├── model.py(新)
│ │ └── processing.py(新)
│ └── config.pbtxt(新)
│
└── detect_yolov5
├── 1
│ └── model.plan
└── config.pbtxt
plugins
├── libmyplugins.so
└── yolov5_base64_input.tar.gz(新)
定义config.pbtxt
name: "detect_yolov5_pipeline"
platform: "ensemble"
max_batch_size: 128
input [
{
name: "img1"
data_type: TYPE_UINT8
dims: [ 360, 640, 3 ]
},
{
name: "nms_threshold"
data_type: TYPE_FP32
dims: [ 1 ]
},
{
name: "confidence"
data_type: TYPE_FP32
dims: [ 1 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_STRING
dims: [ 1 ]
}
]
ensemble_scheduling {
step [
{
model_name: "detect_dalli_preprocess"
model_version: -1
input_map {
key: "IMG1"
value: "img1"
}
output_map {
key: "DALI_PRE_OUT"
value: "preprocessed_image"
}
},
{
model_name: "detect_yolov5"
model_version: -1
input_map {
key: "data"
value: "preprocessed_image"
},
output_map {
key: "prob"
value: "infer_out"
}
},
{
model_name: "detect_postprocess"
model_version: -1
input_map {
key: "POST_INPUT_TENSOR"
value: "infer_out"
},
input_map {
key: "POST_INPUT_CONF"
value: "confidence"
},
input_map {
key: "POST_INPUT_NMS"
value: "nms_threshold"
},
output_map {
key: "POST_OUTPUT"
value: "OUTPUT0"
}
}
]
}
此时的目录结构
models
├── dalli_preprocess
│ ├── 1
│ │ └── model.dali
│ └── config.pbtxt
├── detect_postprocess
│ ├── 1
│ │ ├── __pycache__
│ │ │ ├── boundingbox.cpython-38.pyc
│ │ │ ├── model.cpython-38.pyc
│ │ │ └── processing.cpython-38.pyc
│ │ ├── boundingbox.py
│ │ ├── model.py
│ │ └── processing.py
│ └── config.pbtxt
├── detect_yolov5
│ ├── 1
│ │ └── model.plan
│ └── config.pbtxt
└── detect_yolov5_pipeline(新)
├── 1(新,空文件夹)
└── config.pbtxt(新)
plugins
├── libmyplugins.so
└── yolov5_base64_input.tar.gz
docker run \
--gpus 1 \
--shm-size=2g \
--rm \
-p8000:8000 -p8001:8001 -p8002:8002 \
-v /home/ubuntu/project/triton_deploy/models:/models -v /home/ubuntu/project/triton_deploy/plugins:/plugins \
--env LD_PRELOAD=/plugins/libmyplugins.so nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver \
--model-repository=/models