TVM部署神经网络模型到android端

tvm是深度学习编译器,它可以把神经网络模型编译成动态库,然后部署到多种硬件上,包括各种端侧设备。
一、安装tvm
此处是在linux环境下安装tvm,通过tvm的python接口将深度学习模型编译成动态链接库,参考链接:tvm安装教程
二、交叉编译libtvm_runtime.so
下载NDK工具,解压到/opt目录下,然后在tvm的代码的根目录执行以下shell命令,在buildruntime目录下得到libtvm_runtime.so,参考社区交叉编译指导

set -e
export ANDROID_NDK = /opt/android-ndk-r20b
BUILD_DIR = buildruntime
mkdir $BUILD_DIR
cd $BUILD_DIR
cmake ../ \
      -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
      -DCMAKE_BUILD_TYPE=Release \
      -DANDROID_ABI="arm64-v8a" \
      -DANDROID_STL=c++_static \
      -DCMAKE_BUILD_TYPE=Release \
      -DANDROID_NATIVE_API_LEVEL=android-21  \
      -DANDROID_TOOLCHAIN=clang++
make runtime -j4

三、编译onnx模型
利用tvm提供的python接口,读入onnx深度学习模型,将其编译成android端的动态库,参考部署模型到android和部署模型到树莓派

import onnx
import numpy as np
import tvm
import tvm.relay as relay
import os
from tvm.contrib import utils, ndk

onnx_model = onnx.load('mobilenet.onnx')
x = np.ones([1,3,224,224])                             //输入的tensor shape
target =  "llvm -mtriple=arm64-linux-android"              //编译的目标架构
input_name = 'input'                                                       //网络输入节点名
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with tvm.transform.PassContext(opt_level=3):
    graph, lib, params = relay.build(sym, target=target, params=params)

lib.export_library("deploy.so", cc="/opt/android-ndk-r20b/toolchains/llvm/prebuilt/
	linux-x86_64/bin/aarch64-linux-android24-clang++")
#lib.export_library("deploy.so", ndk.create_shared) 需要声明环境变量 export TVM_NDK_CC
graph_json_path = "deploy.json"
with open(graph_json_path, 'w') as fo:
    fo.write(graph)

param_path = "deploy.params"
with open(param_path, 'wb') as fo:
    fo.write(relay.save_param_dict(params))

四、c++推理代码,主程序

#include 
#include 
#include 
#include 

#include 
#include 
#include 

int main()
{
    // tvm module for compiled functions
    tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so");

    // json graph
    std::ifstream json_in("deploy.json", std::ios::in);
    std::string json_data((std::istreambuf_iterator<char>(json_in)), std::istreambuf_iterator<char>());
    json_in.close();

    // parameters in binary
    std::ifstream params_in("deploy.params", std::ios::binary);
    std::string params_data((std::istreambuf_iterator<char>(params_in)), std::istreambuf_iterator<char>());
    params_in.close();

    // parameters need to be TVMByteArray type to indicate the binary data
    TVMByteArray params_arr;
    params_arr.data = params_data.c_str();
    params_arr.size = params_data.length();

    int dtype_code = kDLFloat;
    int dtype_bits = 32;
    int dtype_lanes = 1;
    int device_type = kDLCPU;
    int device_id = 0;

    // get global function module for graph runtime
    tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(json_data, mod_syslib, device_type, device_id);

    DLTensor* x;
    int in_ndim = 4;
    int64_t in_shape[4] = {1, 3, 224, 224};
    TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
    // load image data saved in binary
    std::ifstream data_fin("cat.bin", std::ios::binary);
    data_fin.read(static_cast<char*>(x->data), 3 * 224 * 224 * 4);

    // get the function from the module(set input data)
    tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
    set_input("input", x);

    // get the function from the module(load patameters)
    tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params");
    load_params(params_arr);

    // get the function from the module(run it)
    tvm::runtime::PackedFunc run = mod.GetFunction("run");
    run();

    DLTensor* y;
    int out_ndim = 2;
    int64_t out_shape[2] = {1, 1000};
    TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);

    // get the function from the module(get output data)
    tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
    get_output(0, y);

    // get the maximum position in output vector
    auto y_iter = static_cast<float*>(y->data);
    auto max_iter = std::max_element(y_iter, y_iter + 1000);
    auto max_index = std::distance(y_iter, max_iter);
    std::cout << "The maximum position in output vector is: " << max_index << std::endl;

    TVMArrayFree(x);
    TVMArrayFree(y);

    return 0;
}

构建可执行程序的CMakeLists

cmake_minimum_required(VERSION 3.0)
project(testdemo)
include_directories(
include
3rdparty/dmlc-core/include
3rdparty/dlpack/include
src/runtime
)
link_directories(
buildruntime
)
add_executable(testdemo, demotest.cpp)
target_link_libraries((testdemo, libtvm_runtime.so)

将编译得到的可执行程序testdemo、交叉编译的libtvm_rumtime.so、以及编译onnx模型得到动态库等文件一起push到手机即可进行推理。

四、tvm原语生成算子并在android端推理
生成算子动态库,参考交叉编译

import tvm
import numpy as np
from tvm import te
from tvm import relay
import os
from tvm.contrib import ndk
def prepare_test_libs(base_path):
    n = te.var("n")
    A = te.placeholder((n,), name="A")
    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
    s = te.create_schedule(B.op)
    # Compile library as dynamic library
    target =  "llvm -mtriple=arm64-linux-android"  
    fadd_dylib = tvm.build(s, [A, B], target=target, name="addone")
    dylib_path = os.path.join(base_path, "test_addone_dll.so")
    fadd_dylib.export_library(dylib_path, ndk.create_shared)
if __name__ == "__main__":
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    prepare_test_libs(os.path.join(curr_path, "lib"))

c++端侧集成

#include 
#include 
#include 
#include 

#include 

void Verify(tvm::runtime::Module mod, std::string fname) {
  // Get the function from the module.
  tvm::runtime::PackedFunc f = mod.GetFunction(fname);
  ICHECK(f != nullptr);
  DLTensor* x;
  DLTensor* y;
  int ndim = 1;
  int dtype_code = kDLFloat;
  int dtype_bits = 32;
  int dtype_lanes = 1;
  int device_type = kDLCPU;
  int device_id = 0;
  int64_t shape[1] = {10};
  TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
  TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
  for (int i = 0; i < shape[0]; ++i) {
    static_cast<float*>(x->data)[i] = i;
  }
  // Invoke the function
  // PackedFunc is a function that can be invoked via positional argument.
  // The signature of the function is specified in tvm.build
  f(x, y);
  // Print out the output
  for (int i = 0; i < shape[0]; ++i) {
    ICHECK_EQ(static_cast<float*>(y->data)[i], i + 1.0f);
  }
  LOG(INFO) << "Finish verification...";
  TVMArrayFree(x);
  TVMArrayFree(y);
}

void DeploySingleOp() {
  // Normally we can directly
  tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile("lib/test_addone_dll.so");
  LOG(INFO) << "Verify dynamic loading from test_addone_dll.so";
  Verify(mod_dylib, "addone");
}
int main(void) {
  DeploySingleOp();
  return 0;
}

源码的示例路径:tvm\apps\howto_deploy

你可能感兴趣的:(深度学习框架)