Triton部署YOLOV5笔记(一)
Triton部署YOLOV5笔记(二)
triton部署yolov5笔记(三)
github链接
triton部署yolov5笔记(三)已经将模型部署到服务器,通过客户端代码进行推理,本篇将模型推理部分放到了python backend部分,即将模型推理写进服务端,客户端只需要调用接口,传输内容为输入图片路径,输出内容为输出图片路径。为了降低传输图片造成的网络延时,将图片文件夹images挂载到了容器内,服务端只需要接收客户端传来的图片路径,即可调用本地图片进行推理,返回给客户端输出图片的路径。输入输出路径是写死的。
tensorrt-server
├── client.py
├── images
│ ├── input_img
│ │ └── 4.jpg
│ └── output_img
│ ├── 20220816
│ │ ├── 48c5a660-0fbd-4e21-9667-f668d3b8148e.jpg
│ │ ├── 7aee8a1a-5599-4d3d-a5e4-bfda3f35443b.jpg
│ │ └── ae09a6a3-da83-4b50-85af-7be13f1d3a7d.jpg
│ └── 20220817
│ ├── 3f086bb3-7137-40f6-890a-7871ae43817b.jpg
│ ├── 573ae2d4-e6d8-4a61-8360-bebb3eb3252c.jpg
│ ├── 7563cb88-3be0-4248-be81-a7d379ff29a5.jpg
│ ├── ac01b9f3-0f8c-428d-886a-587a5b9c92fc.jpg
│ └── cacab491-67ee-4cfc-9790-983143b1283c.jpg
├── models
│ ├── custom_model
│ │ ├── 1
│ │ │ ├── model.py
│ │ │ └── __pycache__
│ │ │ ├── hat_utils.cpython-38.pyc
│ │ │ └── model.cpython-38.pyc
│ │ ├── config.pbtxt
│ │ └── triton_server.tar.gz
│ └── model_0
│ └── 1
│ └── model.plan
├── plugins
│ └── libmyplugins.so
└── readme.txt
其中custom_model目录下为python backend部分,我们再model.py文件中进行模型的推理工作。这里需要将模型推理过程中需要的依赖库打包在custom_model文件夹下,即triton_server.tar.gz压缩包,并在配置文件config.pbtxt文件中最后说明。
triton_server.tar.gz文件的创建方法可以参考Triton部署YOLOV5笔记(二)文章最后的使用本地环境,需要什么依赖库在conda虚拟环境中安装玩然后打包即可。
config.pbtxt配置文件如下:
name: "custom_model"
backend: "python"
input [
{
name: "input0"
data_type: TYPE_STRING
dims: [1]
}
]
output [
{
name: "output0"
data_type: TYPE_STRING
dims: [1]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [ 0 ]
}
]
parameters: {
key: "EXECUTION_ENV_PATH",
value: {string_value: "$$TRITON_MODEL_DIRECTORY/triton_server.tar.gz"}
模型的输入为一个字符串,输出为一个字符串,即图片路径。
model.py文件如下:
# model.py
import json
import numpy as np
import triton_python_backend_utils as pb_utils
import cv2
import random
import math
from enum import Enum
from torch.utils.dlpack import from_dlpack
import time
import uuid
import os
_LINE_THICKNESS_SCALING = 500.0
_TEXT_THICKNESS_SCALING = 700.0
_TEXT_SCALING = 520.0
np.random.seed(69)
RAND_COLORS = np.random.randint(10, 255, (80, 3), "int") # used for class visu
class COCOLabels(Enum):
hat = 0
no_hat = 1
class BoundingBox:
def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height):
self.classID = classID
self.confidence = confidence
self.x1 = x1
self.x2 = x2
self.y1 = y1
self.y2 = y2
self.u1 = x1 / image_width
self.u2 = x2 / image_width
self.v1 = y1 / image_height
self.v2 = y2 / image_height
def box(self):
return (self.x1, self.y1, self.x2, self.y2)
def width(self):
return self.x2 - self.x1
def height(self):
return self.y2 - self.y1
def center_absolute(self):
return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2))
def center_normalized(self):
return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2))
def size_absolute(self):
return (self.x2 - self.x1, self.y2 - self.y1)
def size_normalized(self):
return (self.u2 - self.u1, self.v2 - self.v1)
class TritonPythonModel:
def initialize(self, args):
self.model_config = model_config = json.loads(args['model_config'])
output0_config = pb_utils.get_output_config_by_name(model_config, "output0")
# output1_config = pb_utils.get_output_config_by_name(model_config, "output1")
self.output0_dtype = pb_utils.triton_string_to_numpy(output0_config['data_type'])
# self.output1_dtype = pb_utils.triton_string_to_numpy(output1_config['data_type'])
def execute(self, requests):
output0_dtype = self.output0_dtype
# output1_dtype = self.output1_dtype
responses = []
for request in requests:
in_0 = pb_utils.get_input_tensor_by_name(request, 'input0')
# in_1 = pb_utils.get_input_tensor_by_name(request, 'input1')
in_0 = in_0.as_numpy() #获取输入字符串的numpy格式数据
text = in_0[0].decode("utf-8") #输入的第0个字符,即传入的数据,该数据是客户端编码的格式,需要解码。
# in_1 = in_1.as_numpy()
t1 = time.time()
img = self._recognize(text)
t2 = time.time()
print('inference time is: {}ms'.format(1000 * (t2 - t1)))
# out_1 = in_1
# 文件夹管理(根据日期建立文件夹,根据时间输出图片)
time_now = time.strftime("%Y%m%d", time.localtime())
output_path = '/images/output_img/' + time_now
output_path_0 = '/' + time_now
if not os.path.exists(output_path):
os.makedirs(output_path)
# out_pic_name = output_path + '/' + time.strftime("%H_%M_%S", time.localtime()) + '.jpg'
uuidFour = str(uuid.uuid4())
out_pic_name = output_path + '/' + uuidFour + '.jpg'
out_pic_name_0 = output_path_0 + '/' + uuidFour + '.jpg'
cv2.imwrite(out_pic_name, img.astype(np.uint8))
out_pic_name = np.array(out_pic_name_0)
out_tensor_0 = pb_utils.Tensor('output0', out_pic_name.astype(output0_dtype))
inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
responses.append(inference_response)
return responses
def finalize(self):
print('Cleaning up...')
def _recognize(self,draw_path):
con_thres = 0.25 # 置信度阈值
iou_thres = 0.45 # 非极大值抑制阈值 iou
img_size = [640, 640]
# input_image = draw.copy()
# input_image_buffer = self.preprocess(input_image, img_size)
# draw = draw.copy()
draw = cv2.imread(draw_path)
# src_size = draw.shape[:2]
# 图片填充并进行归一化
# img = self.letterbox(draw,img_size,stride=32)[0]
# img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
# img = np.ascontiguousarray(img)
# 归一化
# img=img.astype(dtype=np.float32)
# img/=255.0
# 维度扩张
# input_image_buffer=np.expand_dims(img,axis=0).astype(np.float32)
input_image_buffer = self.preprocess(draw, img_size)
input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
# print(input_image_buffer.shape)
# 维度扩张
# input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
inference_request = pb_utils.InferenceRequest(
model_name = 'model_0',
requested_output_names=['prob'],
inputs=[pb_utils.Tensor('data', input_image_buffer)]
)
inference_response = inference_request.exec()
# result = pb_utils.get_output_tensor_by_name(inference_response, 'prob')
# result = result.as_numpy()
result = self.pb_tensor_to_numpy(pb_utils.get_output_tensor_by_name(inference_response, 'prob'))
# print(result.shape)
detected_objects = self.postprocess(result, draw.shape[1], draw.shape[0], img_size, con_thres, iou_thres)
# print(detected_objects)
for box in detected_objects:
# print(f"{COCOLabels(box.classID).name}: {box.confidence}")
draw = self.plot_one_box(box.box(), draw,color=tuple(RAND_COLORS[box.classID % 64].tolist()), label=f"{COCOLabels(box.classID).name}:{box.confidence:.2f}",)
return draw
def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True,
stride=32):
'''图片归一化'''
# Resize and pad image while meeting stride-multiple constraints
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
def pb_tensor_to_numpy(self,pb_tensor):
'''pb_tensor转换为numpy格式'''
if pb_tensor.is_cpu():
return pb_tensor.as_numpy()
else:
pytorch_tensor = from_dlpack(pb_tensor.to_dlpack())
return pytorch_tensor.cpu().numpy()
def preprocess(self, raw_bgr_image, input_shape):
input_w, input_h = input_shape
image_raw = raw_bgr_image
h, w, c = image_raw.shape
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
# Calculate widht and height and paddings
r_w = input_w / w
r_h = input_h / h
if r_h > r_w:
tw = input_w
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((input_h - th) / 2)
ty2 = input_h - th - ty1
else:
tw = int(r_h * w)
th = input_h
tx1 = int((input_w - tw) / 2)
tx2 = input_w - tw - tx1
ty1 = ty2 = 0
# Resize the image with long side while maintaining ratio
image = cv2.resize(image, (tw, th))
# Pad the short side with (128,128,128)
image = cv2.copyMakeBorder(
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
)
image = image.astype(np.float32)
# Normalize to [0,1]
image /= 255.0
# HWC to CHW format:
image = np.transpose(image, [2, 0, 1])
return image
def postprocess(self, output, origin_w, origin_h, input_shape, conf_th=0.5, nms_threshold=0.5, letter_box=False):
"""Postprocess TensorRT outputs.
# Args
output: list of detections with schema
[num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
conf_th: confidence threshold
letter_box: boolean, referring to _preprocess_yolo()
# Returns
list of bounding boxes with all detections above threshold and after nms, see class BoundingBox
"""
# Get the num of boxes detected
# Here we use the first row of output in that batch_size = 1
output = output[0]
num = int(output[0])
# Reshape to a two dimentional ndarray
pred = np.reshape(output[1:], (-1, 6))[:num, :]
# Do nms
boxes = self.non_max_suppression(pred, origin_h, origin_w, input_shape[0], input_shape[1], conf_thres=conf_th, nms_thres=nms_threshold)
result_boxes = boxes[:, :4] if len(boxes) else np.array([])
result_scores = boxes[:, 4] if len(boxes) else np.array([])
result_classid = boxes[:, 5].astype(np.int) if len(boxes) else np.array([])
detected_objects = []
for box, score, label in zip(result_boxes, result_scores, result_classid):
detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], origin_h, origin_w))
return detected_objects
def non_max_suppression(self, prediction, origin_h, origin_w, input_w, input_h, conf_thres=0.5, nms_thres=0.4):
"""
description: Removes detections with lower object confidence score than 'conf_thres' and performs
Non-Maximum Suppression to further filter detections.
param:
prediction: detections, (x1, y1, x2, y2, conf, cls_id)
origin_h: original image height
origin_w: original image width
conf_thres: a confidence threshold to filter detections
nms_thres: a iou threshold to filter detections
return:
boxes: output after nms with the shape (x1, y1, x2, y2, conf, cls_id)
"""
# Get the boxes that score > CONF_THRESH
boxes = prediction[prediction[:, 4] >= conf_thres]
# print(boxes)
# Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
boxes[:, :4] = self.xywh2xyxy(boxes[:, :4], origin_h, origin_w, input_w, input_h )
# clip the coordinates
boxes[:, 0] = np.clip(boxes[:, 0], 0, origin_w -1)
boxes[:, 2] = np.clip(boxes[:, 2], 0, origin_w -1)
boxes[:, 1] = np.clip(boxes[:, 1], 0, origin_h -1)
boxes[:, 3] = np.clip(boxes[:, 3], 0, origin_h -1)
# Object confidence
confs = boxes[:, 4]
# Sort by the confs
boxes = boxes[np.argsort(-confs)]
# Perform non-maximum suppression
keep_boxes = []
while boxes.shape[0]:
large_overlap = self.bbox_iou(np.expand_dims(boxes[0, :4], 0), boxes[:, :4]) > nms_thres
label_match = boxes[0, -1] == boxes[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
keep_boxes += [boxes[0]]
boxes = boxes[~invalid]
boxes = np.stack(keep_boxes, 0) if len(keep_boxes) else np.array([])
return boxes
def xywh2xyxy(self, x, origin_h, origin_w, input_w, input_h):
"""
description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
param:
origin_h: height of original image
origin_w: width of original image
x: A boxes numpy, each row is a box [center_x, center_y, w, h]
return:
y: A boxes numpy, each row is a box [x1, y1, x2, y2]
"""
y = np.zeros_like(x)
r_w = input_w / origin_w
r_h = input_h / origin_h
if r_h > r_w:
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2 - (input_h - r_w * origin_h) / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2 - (input_h - r_w * origin_h) / 2
y /= r_w
else:
y[:, 0] = x[:, 0] - x[:, 2] / 2 - (input_w - r_h * origin_w) / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2 - (input_w - r_h * origin_w) / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
y /= r_h
return y
def bbox_iou(self, box1, box2, x1y1x2y2=True):
"""
description: compute the IoU of two bounding boxes
param:
box1: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
box2: A box coordinate (can be (x1, y1, x2, y2) or (x, y, w, h))
x1y1x2y2: select the coordinate format
return:
iou: computed iou
"""
if not x1y1x2y2:
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
# Get the coordinates of the intersection rectangle
inter_rect_x1 = np.maximum(b1_x1, b2_x1)
inter_rect_y1 = np.maximum(b1_y1, b2_y1)
inter_rect_x2 = np.minimum(b1_x2, b2_x2)
inter_rect_y2 = np.minimum(b1_y2, b2_y2)
# Intersection area
inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * \
np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def render_box(self, img, box, color=(200, 200, 200)):
"""
Render a box. Calculates scaling and thickness automatically.
:param img: image to render into
:param box: (x1, y1, x2, y2) - box coordinates
:param color: (b, g, r) - box color
:return: updated image
"""
x1, y1, x2, y2 = box
thickness = int(
round(
(img.shape[0] * img.shape[1])
/ (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING)
)
)
thickness = max(1, thickness)
img = cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
thickness=thickness
)
return img
def render_filled_box(self, img, box, color=(200, 200, 200)):
"""
Render a box. Calculates scaling and thickness automatically.
:param img: image to render into
:param box: (x1, y1, x2, y2) - box coordinates
:param color: (b, g, r) - box color
:return: updated image
"""
x1, y1, x2, y2 = box
img = cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
thickness=cv2.FILLED
)
return img
def get_text_size(self, img, text, normalised_scaling=1.0):
"""
Get calculated text size (as box width and height)
:param img: image reference, used to determine appropriate text scaling
:param text: text to display
:param normalised_scaling: additional normalised scaling. Default 1.0.
:return: (width, height) - width and height of text box
"""
thickness = int(
round(
(img.shape[0] * img.shape[1])
/ (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
)
* normalised_scaling
)
thickness = max(1, thickness)
scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0]
def render_text(self, img, text, pos, color=(200, 200, 200), normalised_scaling=1.0):
"""
Render a text into the image. Calculates scaling and thickness automatically.
:param img: image to render into
:param text: text to display
:param pos: (x, y) - upper left coordinates of render position
:param color: (b, g, r) - text color
:param normalised_scaling: additional normalised scaling. Default 1.0.
:return: updated image
"""
x, y = pos
thickness = int(
round(
(img.shape[0] * img.shape[1])
/ (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
)
* normalised_scaling
)
thickness = max(2, thickness)
scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
size = self.get_text_size(img, text, normalised_scaling)
cv2.putText(
img,
text,
(int(x), int(y + size[1])),
cv2.FONT_HERSHEY_SIMPLEX,
scaling,
color,
thickness=thickness,
)
return img
def plot_one_box(self, x, img, color=None, label=None, line_thickness=None):
"""
description: Plots one bounding box on image img,
this function comes from YoLov5 project.
param:
x: a box likes [x1,y1,x2,y2]
img: a opencv image object
color: color to draw rectangle, such as (0,255,0)
label: str
line_thickness: int
return:
no return
"""
tl = (
line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
) # line/font thickness
if color == None:
color = [np.random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(
img,
label,
(c1[0], c1[1] - 2),
0,
tl / 3,
[225, 255, 255],
thickness=tf,
lineType=cv2.LINE_AA,
)
return img
放置训练好的模型。model.plan文件就是我们训练好的模型,可以参考triton部署yolov5笔记(三)来获取model.plan文件,或者按你自己的方法来获取tensorrt加速部署,最终得到该文件。
放置tensorrt模型转换过程中生成的.so文件,libmyplugins.so。
我在这个目录下建立了两个目录,一个是input_img,放置输入图片,一个是output_img,放置输出图片。在输出图片文件夹中,会根据当前时间自动建立按日期命名的文件夹,生成的图片名称为uuid随机命名。
运行docker容器,开启服务
sudo docker run --gpus all -it --rm --name tensorrt-server --shm-size=1g -p8080:8000 -p8081:8001 -p8082:8002 -v $(pwd)/models:/models -v $(pwd)/plugins:/plugins -v $(pwd)/images:/images --env LD_PRELOAD=/plugins/libmyplugins.so nvcr.io/nvidia/tritonserver:21.09-py3
tritonserver --model-repository=/models --strict-model-config=false --log-verbose 1
运行client.py文件
import numpy as np
import cv2
import tritonclient.http as httpclient
import time
if __name__ == '__main__':
triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8080')
img_path = '/images/input_img/4.jpg'
# img_path = 'https://pic.rmb.bdstatic.com/bjh/down/e0b159fdbd9eba47bbfbdff212492bd4.jpeg'
input_data0 = np.array([img_path.encode("utf-8")],dtype=np.object_)
print(input_data0)
inputs = []
inputs.append(httpclient.InferInput('input0', [1], "BYTES"))
inputs[0].set_data_from_numpy(input_data0, binary_data=True)
outputs = []
outputs.append(httpclient.InferRequestedOutput('output0', binary_data=False)
t1 = time.time()
results = triton_client.infer('custom_model', inputs=inputs, outputs=outputs)
t2 = time.time()
print('inference time is: {}ms'.format(1000 * (t2 - t1)))
output_data0 = results.as_numpy('output0')
print(output_data0)