tvm简介:https://zhuanlan.zhihu.com/p/88369758
18.04.1-Ubuntu x86_64 x86_64 x86_64 GNU/Linux
CPU: 8 Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
tvm-0.7.dev1
torch-1.4.0
torchvision-0.5.0
tvm安装文档,另官网推荐使用torch不小于1.3.0
使用torchvision自带的resnet18进行实验测试
import torch
import torchvision
model_name = 'resnet18'
model = getattr(torchvision.models, model_name)(pretrained=True)
# ~/.cache/torch/checkpoints/resnet18-5c106cde.pth
model = model.eval()
# We grab the TorchScripted model via tracing
input_shape = [1, 3, 224, 224]
input_data = torch.randn(input_shape)
#scripted_model不依赖python环境
scripted_model = torch.jit.trace(model, input_data).eval()
最后一句会提示警告,论坛帖子说这是正常的- -
WARNING:root:Untyped Tensor found, assume it is float
torch模型到tvm模型的转换
#Convert PyTorch graph to Relay graph.
import tvm, time
from tvm import relay
input_name = 'input0' # only one input, set it to this name
shape_list = [(input_name, input_shape)]
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
使用导出的torch模型构建tvm模型
#Compile the graph to llvm target with given input specification
target = 'llvm'
target_host = 'llvm'
ctx = tvm.cpu(0)
with relay.build_config(opt_level=3):
graph, lib, params = relay.build(mod, target=target, target_host=target_host, params=params)
#deploying the compiled model on target.
from tvm.contrib import graph_runtime
tvm_model = graph_runtime.create(graph, lib, ctx)
tvm_model.set_input(**params)
随机生成1000张224*224的图片用于测试
import numpy as np
#考虑到随机生成的图片模型预测结果都一样,使用*(i % 9 + 1)让模型预测结果不同
list_img = [np.random.rand(1, 3, 224, 224).astype(np.float32) * (i % 9 + 1) for i in range(1000)]
1000张图片依次预测,结果返回预测类别和累计预测花费时间
def test_tvm(model, list_img):
since = time.time()
# 记录时间
list_tpoint = []
# 预测top1的类别
list_top = []
for img_x in list_img:
model.set_input(input_name, tvm.nd.array(img_x))
# Execute
model.run()
# Get outputs
output = model.get_output(0)
top1 = np.argmax(output.asnumpy()[0])
list_top.append(top1)
list_tpoint.append(time.time() - since)
return list_top, list_tpoint
输出结果同上
@torch.no_grad()
def test_torch(model, list_img):
since = time.time()
list_top = []
list_tpoint = []
for img_x in list_img:
img_x = torch.from_numpy(img_x)
output = model(img_x)
top1 = np.argmax(output.numpy()[0])
list_top.append(top1)
list_tpoint.append(time.time() - since)
return list_top, list_tpoint
tvm_top,tvm_point=test_tvm(tvm_model,list_img)
torch_top,torch_points=test_torch(torch_model,list_img)
#打印2个模型耗费的时间
import matplotlib.pyplot as plt
plt.figure()
plt.plot(tvm_point,label='tvm')
plt.plot(torch_points,label='torch')
plt.legend()
plt.savefig('time_cost.png')
plt.show()
2个模型的部分预测结果
>>> tvm_top[:10]
[111, 111, 318, 818, 818, 644, 644, 644, 644, 111]
>>> torch_top[:10]
[111, 111, 318, 818, 818, 644, 644, 644, 644, 111]
2个模型预测耗时
>>> torch_points[-1]
53.492563009262085
>>> tvm_point[-1]
43.807936906814575
2个模型的累计时间花费图,横坐标为图片数,总坐标为耗时(秒)
小结:TVM相对原生态的torch在推理速度上还是有一定优势
reference:
https://zhuanlan.zhihu.com/p/88369758
https://docs.tvm.ai/tutorials/frontend/from_pytorch.html