python tools/pytorch2onnx.py configs/top_down/hrnet/coco/hrnet_w48_coco_256x192.py checkpoints/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth --output-file hrnet_w48_coco_256x192.onnx
python remove_initializer_from_input.py --input your_old_model.onnx --output your_new_model.onnx
import onnx
import onnxruntime
import cv2
import numpy as np
from mmpose.core.evaluation import *
def inference_with_onnx(config):
onnx_model = config['modelPaths']
session = onnxruntime.InferenceSession(onnx_model)
output_tensor = [node.name for node in session.get_outputs()]
input_tensor = session.get_inputs()
image_path = config['filenames']
image = cv2.imread(image_path)
data = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(192, 256), mean=[0.485, 0.456, 0.406], swapRB=True,
crop=False)
output_result = session.run(output_tensor, input_feed={input_tensor[0].name: data})
return output_result
def vis_pose(img, points):
for i, point in enumerate(points):
x, y = point
x = int(x)
y = int(y)
cv2.circle(img, (x, y), 4, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, '{}'.format(i), (x, y), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5,
color=(255, 255, 255),
thickness=1, lineType=cv2.LINE_AA)
return img
def vis_box(img, box):
x, y, w, h = list(map(int, box))
img = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
return img
if __name__ == '__main__':
config = {
'modelPaths': '/home/guest/lsc/project/mmLab/mmpose/hrnet_w48_coco_256x192.onnx',
'filenames': '/home/guest/lsc/project/mmLab/mmpose/tests/data/coco/000000000785.jpg'
}
img = cv2.imread(config['filenames'])
x, y, w, h = 0, 0, 640, 425
heatmap = inference_with_onnx(config)[0]
center = np.array([[x + w * 0.5, y + h * 0.5]], dtype=np.float32)
scale = np.array([[h, w]], dtype=np.float32)
res = keypoints_from_heatmaps(heatmap, center, scale)[0]
res = res.tolist()
print(res)
img = vis_pose(img, res[0])
cv2.imwrite('/home/guest/lsc/project/mmLab/mmpose/vis_results/onnx/test.jpg', img)
这段代码使用的是hrnet_w48_coco_256x192.pth转onnx之后的模型,之后若无特殊说明,则默认是使用这个模型。
测试的图片是使用的mmpose/tests/data/coco/000000000785.jpg
,原图如下:
x, y, w, h = 0, 0, 640, 425
时,即把整张图片当成是人体的检测框,然后根据top_down_eval.py中的keypoints_from_heatmaps函数得到人体的17个关键点。center (np.ndarray[N, 2]): Center of the bounding box (x, y).
scale (np.ndarray[N, 2]): Scale of the bounding box
wrt height/width.
我的理解是center是人体框的中心点坐标,scale是人体框的高和宽,所以在上面的代码中有center = np.array([[x + w * 0.5, y + h * 0.5]], dtype=np.float32)
和scale = np.array([[h, w]], dtype=np.float32)
但是这样的到的17个点的结果为:
[[[5189.79296875, -38287.5], [6960.625, -40287.5], [5189.79296875, -39287.5], [7846.04296875, -39287.5], [5189.79296875, -39287.5], [10502.29296875, -31287.5], [6960.625, -28287.5], [14929.375, -17287.5], [4304.375, -16287.5], [15814.79296875, -17287.5], [-1893.5390625, -10287.5], [14043.9609375, -1287.5], [11387.7109375, -1287.5], [14929.375, 22712.5], [7846.04296875, 18712.5], [20241.875, 45712.5], [11387.7109375, 39712.5]]]
很显然这些点的坐标有问题。
3. 由于第2步的问题,我怀疑是不是我的center和scale设置有问题,所以我继续看代码,直到看到inference.py中的第272行center, scale = _box2cs(cfg, bbox)
,在_box2cs(cfg, bbox)
中的第109行有一句scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
,于是我把我的代码中的scale的那一句改为scale = np.array([[h/200, w/200]], dtype=np.float32)
,得到了一张可视化结果图:
啊这。。。误差太大了吧。
4. 我仔细一看,发现在_box2cs(cfg, bbox)
的第109行中的scale是将宽放在前面,高放在后面,于是我修改我的代码为scale = np.array([[w/200, h/200]], dtype=np.float32)
,得到了可视化的图:
这下貌似是对的了!