tvm是深度学习编译器,它可以把神经网络模型编译成动态库,然后部署到多种硬件上,包括各种端侧设备。
一、安装tvm
此处是在linux环境下安装tvm,通过tvm的python接口将深度学习模型编译成动态链接库,参考链接:tvm安装教程
二、交叉编译libtvm_runtime.so
下载NDK工具,解压到/opt目录下,然后在tvm的代码的根目录执行以下shell命令,在buildruntime目录下得到libtvm_runtime.so,参考社区交叉编译指导
set -e
export ANDROID_NDK = /opt/android-ndk-r20b
BUILD_DIR = buildruntime
mkdir $BUILD_DIR
cd $BUILD_DIR
cmake ../ \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_ABI="arm64-v8a" \
-DANDROID_STL=c++_static \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_NATIVE_API_LEVEL=android-21 \
-DANDROID_TOOLCHAIN=clang++
make runtime -j4
三、编译onnx模型
利用tvm提供的python接口,读入onnx深度学习模型,将其编译成android端的动态库,参考部署模型到android和部署模型到树莓派
import onnx
import numpy as np
import tvm
import tvm.relay as relay
import os
from tvm.contrib import utils, ndk
onnx_model = onnx.load('mobilenet.onnx')
x = np.ones([1,3,224,224]) //输入的tensor shape
target = "llvm -mtriple=arm64-linux-android" //编译的目标架构
input_name = 'input' //网络输入节点名
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build(sym, target=target, params=params)
lib.export_library("deploy.so", cc="/opt/android-ndk-r20b/toolchains/llvm/prebuilt/
linux-x86_64/bin/aarch64-linux-android24-clang++")
#lib.export_library("deploy.so", ndk.create_shared) 需要声明环境变量 export TVM_NDK_CC
graph_json_path = "deploy.json"
with open(graph_json_path, 'w') as fo:
fo.write(graph)
param_path = "deploy.params"
with open(param_path, 'wb') as fo:
fo.write(relay.save_param_dict(params))
四、c++推理代码,主程序
#include
#include
#include
#include
#include
#include
#include
int main()
{
// tvm module for compiled functions
tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so");
// json graph
std::ifstream json_in("deploy.json", std::ios::in);
std::string json_data((std::istreambuf_iterator<char>(json_in)), std::istreambuf_iterator<char>());
json_in.close();
// parameters in binary
std::ifstream params_in("deploy.params", std::ios::binary);
std::string params_data((std::istreambuf_iterator<char>(params_in)), std::istreambuf_iterator<char>());
params_in.close();
// parameters need to be TVMByteArray type to indicate the binary data
TVMByteArray params_arr;
params_arr.data = params_data.c_str();
params_arr.size = params_data.length();
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLCPU;
int device_id = 0;
// get global function module for graph runtime
tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(json_data, mod_syslib, device_type, device_id);
DLTensor* x;
int in_ndim = 4;
int64_t in_shape[4] = {1, 3, 224, 224};
TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
// load image data saved in binary
std::ifstream data_fin("cat.bin", std::ios::binary);
data_fin.read(static_cast<char*>(x->data), 3 * 224 * 224 * 4);
// get the function from the module(set input data)
tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
set_input("input", x);
// get the function from the module(load patameters)
tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params");
load_params(params_arr);
// get the function from the module(run it)
tvm::runtime::PackedFunc run = mod.GetFunction("run");
run();
DLTensor* y;
int out_ndim = 2;
int64_t out_shape[2] = {1, 1000};
TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
// get the function from the module(get output data)
tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
get_output(0, y);
// get the maximum position in output vector
auto y_iter = static_cast<float*>(y->data);
auto max_iter = std::max_element(y_iter, y_iter + 1000);
auto max_index = std::distance(y_iter, max_iter);
std::cout << "The maximum position in output vector is: " << max_index << std::endl;
TVMArrayFree(x);
TVMArrayFree(y);
return 0;
}
构建可执行程序的CMakeLists
cmake_minimum_required(VERSION 3.0)
project(testdemo)
include_directories(
include
3rdparty/dmlc-core/include
3rdparty/dlpack/include
src/runtime
)
link_directories(
buildruntime
)
add_executable(testdemo, demotest.cpp)
target_link_libraries((testdemo, libtvm_runtime.so)
将编译得到的可执行程序testdemo、交叉编译的libtvm_rumtime.so、以及编译onnx模型得到动态库等文件一起push到手机即可进行推理。
四、tvm原语生成算子并在android端推理
生成算子动态库,参考交叉编译
import tvm
import numpy as np
from tvm import te
from tvm import relay
import os
from tvm.contrib import ndk
def prepare_test_libs(base_path):
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
s = te.create_schedule(B.op)
# Compile library as dynamic library
target = "llvm -mtriple=arm64-linux-android"
fadd_dylib = tvm.build(s, [A, B], target=target, name="addone")
dylib_path = os.path.join(base_path, "test_addone_dll.so")
fadd_dylib.export_library(dylib_path, ndk.create_shared)
if __name__ == "__main__":
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
prepare_test_libs(os.path.join(curr_path, "lib"))
c++端侧集成
#include
#include
#include
#include
#include
void Verify(tvm::runtime::Module mod, std::string fname) {
// Get the function from the module.
tvm::runtime::PackedFunc f = mod.GetFunction(fname);
ICHECK(f != nullptr);
DLTensor* x;
DLTensor* y;
int ndim = 1;
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLCPU;
int device_id = 0;
int64_t shape[1] = {10};
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
for (int i = 0; i < shape[0]; ++i) {
static_cast<float*>(x->data)[i] = i;
}
// Invoke the function
// PackedFunc is a function that can be invoked via positional argument.
// The signature of the function is specified in tvm.build
f(x, y);
// Print out the output
for (int i = 0; i < shape[0]; ++i) {
ICHECK_EQ(static_cast<float*>(y->data)[i], i + 1.0f);
}
LOG(INFO) << "Finish verification...";
TVMArrayFree(x);
TVMArrayFree(y);
}
void DeploySingleOp() {
// Normally we can directly
tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile("lib/test_addone_dll.so");
LOG(INFO) << "Verify dynamic loading from test_addone_dll.so";
Verify(mod_dylib, "addone");
}
int main(void) {
DeploySingleOp();
return 0;
}
源码的示例路径:tvm\apps\howto_deploy