前一篇文章手把手教你在FPGA上移植NVDLA+Tengine并且跑通任意神经网络(3)
截止上文已经实现了在ARM-FPGA中跑通一个demo历程。也恰恰说明了搭建的硬件和软件的正确性。那么如何转换设计的神经网络并且使用Tengine框架调用NVDLA后端呢?Tengine社区的example中给了我们最好的答案。首先将pytorch设计的模型转换为onnx模式然后再调用Tengine的model convert工具将模型转换为TMFILE。使用Tengine自带的量化工具进行量化,最后调用tengine的api完成推理过程。
Tengine转换工具链是不支持直接转换为tmfile,需要先转换为onnx中间格式。
注意一道要model.eval()和设置opset版本为10
import torch.onnx
#Function to Convert to ONNX
def Convert_ONNX():
# set the model to inference mode
model.eval()
# Let's create a dummy input tensor
dummy_input = torch.randn(1, input_size, requires_grad=True)
# Export the model
torch.onnx.export(model, # model being run
dummy_input, # model input (or a tuple for multiple inputs)
"totmfile.onnx", # where to save the model
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names = ['modelInput'], # the model's input names
output_names = ['modelOutput'], # the model's output names
dynamic_axes={'modelInput' : {0 : 'batch_size'}, # variable length axes
'modelOutput' : {0 : 'batch_size'}})
print(" ")
print('Model has been converted to ONNX')
导入权重文件,转换模型
if __name__ == "__main__":
# Let's build our model
#train(5)
#print('Finished Training')
# Test which classes performed well
#testAccuracy()
# Let's load the model we just created and test the accuracy per label
model = Network()
path = "$pthname$.pth"
model.load_state_dict(torch.load(path))
# Test with batch of images
#testBatch()
# Test how the classes performed
#testClassess()
# Conversion to ONNX
Convert_ONNX()
三.onnx转tmfile
在上文下载的Tengine工程下,新建一个编译路径,启动模型转换选项。
mkdir build && cd build
cmake -DTENGINE_BUILD_CONVERT_TOOL=ON ..
make -j`nproc`
然后进入编译出的可执行文件中build/tools/convert_tool
xuan@ubuntu:~/Tengine/build2/tools/convert_tool$ ./convert_tool -h
[Convert Tools Info]: optional arguments:
-h help show this help message and exit
-f input type path to input float32 tmfile
-p input structure path to the network structure of input model(*.param, *.prototxt, *.symbol, *.cfg, *.pdmodel)
-m input params path to the network params of input model(*.bin, *.caffemodel, *.params, *.weight, *.pb, *.onnx, *.tflite, *.pdiparams)
-o output model path to output fp32 tmfile
[Convert Tools Info]: example arguments:
./convert_tool -f onnx -m ./mobilenet.onnx -o ./mobilenet.tmfile
./convert_tool -f caffe -p ./mobilenet.prototxt -m ./mobilenet.caffemodel -o ./mobilenet.tmfile
./convert_tool -f mxnet -p ./mobilenet.params -m ./mobilenet.json -o ./mobilenet.tmfile
./convert_tool -f darknet -p ./yolov3.weights -m ./yolov3.cfg -o yolov3.tmfile
xuan@ubuntu:~/Tengine/build2/tools/convert_tool$ ./convert_tool -f onnx -m WDCNN2d_01_23.onnx -o WCDNN_01_23.tmfile
---- Tengine Convert Tool ----
Version : v1.0, 04:59:54 Apr 26 2022
Status : float32
----------onnx2tengine begin----------
Model op set is: 9
Internal optimize in onnx serializer done.
----------onnx2tengine done.----------
graph opt begin
graph opt done.
Convert model success. WDCNN2d_01_23.onnx -----> WCDNN_01_23.tmfile
安装依赖库
sudo apt install libopencv-dev
新建编译环境
mkdir build && cd build
cmake -TENGINE_BUILD_QUANT_TOOL=ON ..
make -j`nproc`
进入build/tools/quantize文件夹下
$ ./quant_tool_int8 -h
[Quant Tools Info]: optional arguments:
-h help show this help message and exit
-m input model path to input float32 tmfile
-i image dir path to calibration images folder
-o output model path to output int8 tmfile
-a algorithm the type of quant algorithm(0:min-max, 1:kl, default is 1)
-g size the size of input image(using the resize the original image,default is 3,224,224
-w mean value of mean (mean value, default is 104.0,117.0,123.0
-s scale value of normalize (scale value, default is 1.0,1.0,1.0)
-b swapRB flag which indicates that swap first and last channels in 3-channel image is necessary(0:OFF, 1:ON, default is 1)
-c center crop flag which indicates that center crop process image is necessary(0:OFF, 1:ON, default is 0)
-y letter box flag which indicates that letter box process image is necessary(maybe using for YOLO, 0:OFF, 1:ON, default is 0)
-t num thread count of processing threads(default is 4)
使用量化工具前, 你需要 Float32 tmfile 和 Calibration Dataset(量化校准数据集)。
matio库:https://github.com/tbeu/matio
在Tengine\tools\quantize下CMakeList.txt加入前端读入数据的库文件,本工程要读入matlab 的 .m文件 引入libmatio库 ,具体修改如下。
TARGET_LINK_LIBRARIES (${name} PRIVATE ${CMAKE_PROJECT_NAME}-static ${OpenCV_LIBS} ${TENGINE_TOOL_LINK_LIBRARIES} libmatio.a -lz)
修改quan_tool_int8.cpp
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: hhchen@openailab.com
*/
#include
#include
#include "quant_tool.hpp"
#include "quant_save_graph.hpp"
#include "matio.h"
#ifdef _MSC_VER
#include "msc_getopt.h"
#undef max
#undef min
#endif
int get_bear_data(const char *data_file, float *array, int num) {
mat_t *matfp = nullptr;
matvar_t *matvar = nullptr;
matfp = Mat_Open(data_file, MAT_ACC_RDONLY);
if (NULL == matfp) {
fprintf(stderr, "Error opening MAT file \"%s\"!\n", data_file);
return EXIT_FAILURE;
}
matvar = Mat_VarReadNext(matfp);
if (NULL == matvar) {
fprintf(stderr, "Variable not found, or error "
"reading MAT file\n");
return EXIT_FAILURE;
}
//Mat_VarPrint(matvar, 1);
if (matvar->rank != 2) {
fprintf(stderr, "Wrong rank! rank = %d\n", matvar->rank);
return EXIT_FAILURE;
}
int data_len = matvar->dims[1];
const double *point = reinterpret_cast<double *> (matvar->data);
for (uint32_t i = 0; i < data_len; ++i) {
array[i] = static_cast<float_t>(*(point + (matvar->dims[0] * i + num)));
}
Mat_VarFree(matvar);
matvar = nullptr;
Mat_Close(matfp);
return EXIT_SUCCESS;
}
QuantTool::QuantTool()
{
// initial tengine
if (init_tengine() != 0)
{
fprintf(stderr, "Initial tengine failed.\n");
}
// system variable
this->opt.num_thread = 4;
this->opt.cluster = TENGINE_CLUSTER_ALL;
this->opt.precision = TENGINE_MODE_FP32;
this->opt.affinity = 0;
this->num_thread = 4;
// input variable
this->sw_RGB = 1;
this->img_c = 3;
this->img_h = 224;
this->img_w = 224;
this->mean[0] = 104.f;
this->mean[1] = 117.f;
this->mean[2] = 123.f;
this->scale[0] = 1.f;
this->scale[1] = 1.f;
this->scale[2] = 1.f;
this->center_crop = 0;
this->letterbox_rows = 0;
this->letterbox_cols = 0;
this->focus = 0;
this->inplace = true;
this->algorithm_type = ALGORITHM_MIN_MAX;
this->evaluate = false;
}
QuantTool::~QuantTool()
{
/* release tengine */
release_tengine();
}
static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits)
{
const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714};
const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4)));
double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N));
return (float)(alpha_gaussian[num_bits - 1] * std);
}
int QuantTool::activation_quant_tool()
{
fprintf(stderr, "[Quant Tools Info]: Step 0, load FP32 tmfile.\n");
/* create graph, load tengine model xxx.tmfile */
struct graph* ir_graph = (struct graph*)create_graph(nullptr, "tengine", model_file.c_str());
if (nullptr == ir_graph)
{
fprintf(stderr, "Create graph failed.\n");
return -1;
}
fprintf(stderr, "[Quant Tools Info]: Step 0, load FP32 tmfile done.\n");
/* set the shape, data buffer of input_tensor of the graph */
int dims[] = {1, 1, 1, 2048}; // nchw
float *input_data = (float *)malloc(2048 * sizeof(float));
tensor_t input_tensor = get_graph_input_tensor(ir_graph, 0, 0);
if (input_tensor == nullptr)
{
fprintf(stderr, "Get input tensor failed\n");
return -1;
}
if (set_tensor_shape(input_tensor, dims, 4) < 0)
{
fprintf(stderr, "Set input tensor shape failed\n");
return -1;
}
if (set_tensor_buffer(input_tensor, input_data, 2048 * sizeof(float)) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* initial malloc the output tensors date buffer of nodes in the graph, to disable the mem pool, before prerun */
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* var_tensor = ir_graph->tensor_list[i];
if (var_tensor->tensor_type == TENSOR_TYPE_VAR)
{
var_tensor->data = (float*)malloc(sizeof(float));
}
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if (prerun_graph_multithread(ir_graph, this->opt) < 0)
{
fprintf(stderr, "Prerun multithread graph failed.\n");
return -1;
}
set_log_level(LOG_INFO);
dump_graph(ir_graph);
fprintf(stderr, "[Quant Tools Info]: Step 0, load calibration image files.\n");
/* really malloc the output tesnors date buffer of nodes in the graph */
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* var_tensor = ir_graph->tensor_list[i];
if (var_tensor->tensor_type == TENSOR_TYPE_VAR)
{
var_tensor->data = realloc(var_tensor->data, sizeof(float) * var_tensor->elem_num);
memset(var_tensor->data, 0, sizeof(float) * var_tensor->elem_num);
}
}
/* read image list */
uint32_t img_num = 3000;
fprintf(stderr, "[Quant Tools Info]: Step 0, load calibration image files done, image num is %d.\n", img_num);
/* init minmax */
std::unordered_map<int, float> max_activation;
std::unordered_map<int, float> min_activation;
std::unordered_map<int, int> act_map;
uint32_t act_tensor_num = 0;
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* act_tensor = ir_graph->tensor_list[i];
if (act_tensor->tensor_type == TENSOR_TYPE_VAR || act_tensor->tensor_type == TENSOR_TYPE_INPUT)
{
act_tensor_num++;
max_activation[i] = -FLT_MAX;
min_activation[i] = FLT_MAX;
act_map[act_tensor_num - 1] = i;
}
}
fprintf(stderr, "[Quant Tools Info]: Step 1, find original calibration table.\n");
/* first loop, find the min/max value of every activation tensor of the graph */
double min_time = DBL_MAX;
double max_time = DBL_MIN;
double total_time = 0.;
for (int nums = 0; nums < img_num; nums++)
{
fprintf(stderr, "\r[Quant Tools Info]: Step 1, images %.5d / %.5d", nums + 1, img_num);
get_bear_data("test_X.mat", input_data,nums);
/* run graph */
double start = get_current_time();
if (run_graph(ir_graph, 1) < 0)
{
fprintf(stderr, "Run graph failed\n");
return -1;
}
double end = get_current_time();
double cur = end - start;
total_time += cur;
min_time = std::min(min_time, cur);
max_time = std::max(max_time, cur);
/* get the min/max value of activation tensor */
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* act_tensor = ir_graph->tensor_list[i];
if (act_tensor->tensor_type == TENSOR_TYPE_VAR || act_tensor->tensor_type == TENSOR_TYPE_INPUT)
{
float* start_addr = (float*)act_tensor->data;
float* end_addr = (float*)act_tensor->data + act_tensor->elem_num;
max_activation[i] = std::max(max_activation[i], *std::max_element(start_addr, end_addr));
min_activation[i] = std::min(min_activation[i], *std::min_element(start_addr, end_addr));
}
}
}
/* save the calibration file with min-max algorithm */
FILE* fp_minmax = fopen("table_minmax.scale", "wb");
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* t = ir_graph->tensor_list[i];
if (t->tensor_type == TENSOR_TYPE_VAR || t->tensor_type == TENSOR_TYPE_INPUT)
{
float act_scale = 1.f;
int act_zero_point = 0;
act_scale = std::max(std::abs(max_activation[i]), std::abs(min_activation[i])) / 127.f;
/* the scale of softmax is always scale = 1 / 127.f */
for (int j = 0; j < ir_graph->node_num; j++)
{
struct node* noden = ir_graph->node_list[j];
struct tensor* tensor_tmp = get_ir_graph_tensor(ir_graph, noden->output_tensors[0]);
if (!(tensor_tmp->tensor_type == TENSOR_TYPE_INPUT || tensor_tmp->tensor_type == TENSOR_TYPE_VAR))
continue;
std::string tmp_op_name = get_op_name_from_type(noden->op.type);
std::string cur_name = t->name;
std::string tmp_name = tensor_tmp->name;
if ((cur_name == tmp_name) && tmp_op_name == "Softmax")
{
act_scale = 1 / 127.f;
break;
}
}
fprintf(fp_minmax, "%s %f %d\n", ir_graph->tensor_list[i]->name, act_scale, act_zero_point);
}
}
fclose(fp_minmax);
fprintf(stderr, "\r\n[Quant Tools Info]: Step 2, find original calibration minmax threshold table done, output ./table_minmax.scale\n");
// fprintf(stderr, "[Quant Tools Info]: Thread %d, image nums %d, total time %.2f ms, avg time %.2f ms\n", num_thread, img_num, total_time, total_time / img_num);
/* release tengine */
postrun_graph(ir_graph);
destroy_graph(ir_graph);
return 0;
}
const char* help_params = "[Quant Tools Info]: optional arguments:\n"
"\t-h help show this help message and exit\n"
"\t-m input model path to input float32 tmfile\n"
"\t-i image dir path to calibration images folder\n"
"\t-f scale file path to calibration scale file\n"
"\t-o output model path to output int8 tmfile\n"
"\t-a algorithm the type of quant algorithm(0:min-max, 1:kl, 2:aciq, default is 0)\n"
"\t-g size the size of input image(using the resize the original image,default is 3,224,224)\n"
"\t-w mean value of mean (mean value, default is 104.0,117.0,123.0)\n"
"\t-s scale value of normalize (scale value, default is 1.0,1.0,1.0)\n"
"\t-b swapRB flag which indicates that swap first and last channels in 3-channel image is necessary(0:OFF, 1:ON, default is 1)\n"
"\t-c center crop flag which indicates that center crop process image is necessary(0:OFF, 1:ON, default is 0)\n"
"\t-y letter box the size of letter box process image is necessary([rows, cols], default is [0, 0])\n"
"\t-k focus flag which indicates that focus process image is necessary(maybe using for YOLOv5, 0:OFF, 1:ON, default is 0)\n"
"\t-t num thread count of processing threads(default is 1)\n";
const char* example_params = "[Quant Tools Info]: example arguments:\n"
"\t./quant_tool_int8 -m ./mobilenet_fp32.tmfile -i ./dataset -o ./mobilenet_int8.tmfile -g 3,224,224 -w 104.007,116.669,122.679 -s 0.017,0.017,0.017\n";
void show_usage()
{
fprintf(stderr, "%s\n", help_params);
fprintf(stderr, "%s\n", example_params);
}
int main(int argc, char* argv[])
{
QuantTool quant_tool;
int res;
while ((res = getopt(argc, argv, "m:a:f:o:i:g:s:w:b:c:y:k:z:t:h")) != -1)
{
switch (res)
{
case 'm':
quant_tool.model_file = optarg;
break;
case 'a':
quant_tool.algorithm_type = atoi(optarg);
break;
case 'f':
quant_tool.scale_file = optarg;
break;
case 'o':
quant_tool.output_file = optarg;
break;
case 'i':
quant_tool.image_dir = optarg;
break;
case 'g':
float img_chw[3];
split(img_chw, optarg, ",");
quant_tool.img_c = (int)img_chw[0];
quant_tool.img_h = (int)img_chw[1];
quant_tool.img_w = (int)img_chw[2];
break;
case 'w':
split(quant_tool.mean, optarg, ",");
break;
case 's':
split(quant_tool.scale, optarg, ",");
break;
case 'b':
quant_tool.sw_RGB = atoi(optarg);
break;
case 'c':
quant_tool.center_crop = atoi(optarg);
break;
case 'y':
float letterboxs[2];
split(letterboxs, optarg, ",");
quant_tool.letterbox_rows = (int)letterboxs[0];
quant_tool.letterbox_cols = (int)letterboxs[1];
break;
case 'k':
quant_tool.focus = atoi(optarg);
break;
case 'z':
quant_tool.evaluate = atoi(optarg);
break;
case 't':
quant_tool.num_thread = atoi(optarg);
quant_tool.opt.num_thread = atoi(optarg);
break;
case 'h':
show_usage();
return 0;
default:
break;
}
}
/* version */
fprintf(stderr, "\n---- Tengine Post Training Quantization Tool ---- \n");
fprintf(stderr, "\nVersion : v1.2, %s %s\n", __TIME__, __DATE__);
fprintf(stderr, "Status : int8, per-channel, symmetric\n");
/* check input params */
if (quant_tool.model_file.empty())
{
fprintf(stderr, "[Quant Tools Info]: The input file of Float32 tmfile file not specified!\n");
show_usage();
return -1;
}
if (quant_tool.output_file.empty())
{
fprintf(stderr, "[Quant Tools Info]: The output file of Int8 tmfile not specified!\n");
show_usage();
return -1;
}
/* debug info : input params */
fprintf(stderr, "Input model : %s\n", quant_tool.model_file.c_str());
fprintf(stderr, "Output model: %s\n", quant_tool.output_file.c_str());
fprintf(stderr, "Calib images: %s\n", quant_tool.image_dir.c_str());
fprintf(stderr, "Scale file : %s\n", quant_tool.scale_file.empty() ? "NULL" : quant_tool.scale_file.c_str());
fprintf(stderr, "Algorithm : %d\n", quant_tool.algorithm_type);
fprintf(stderr, "Dims : %d %d %d\n", quant_tool.img_c, quant_tool.img_h, quant_tool.img_w);
fprintf(stderr, "Mean : %.3f %.3f %.3f\n", quant_tool.mean[0], quant_tool.mean[1], quant_tool.mean[2]);
fprintf(stderr, "Scale : %.3f %.3f %.3f\n", quant_tool.scale[0], quant_tool.scale[1], quant_tool.scale[2]);
fprintf(stderr, "BGR2RGB : %s\n", quant_tool.sw_RGB ? "ON" : "OFF");
fprintf(stderr, "Center crop : %s\n", quant_tool.center_crop ? "ON" : "OFF");
fprintf(stderr, "Letter box : %d %d\n", quant_tool.letterbox_rows, quant_tool.letterbox_cols);
fprintf(stderr, "YOLOv5 focus: %s\n", quant_tool.focus ? "ON" : "OFF");
fprintf(stderr, "Thread num : %d\n\n", quant_tool.num_thread);
switch (quant_tool.algorithm_type)
{
case ALGORITHM_MIN_MAX:
{
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
}
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_KL:
{
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_kl.scale";
quant_tool.activation_quant_tool();
}
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_ACIQ:
{
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_aciq.scale";
quant_tool.activation_quant_tool();
}
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_DFQ:
{
quant_tool.data_free_quant();
quant_tool.model_file = "test_dfq_fp32.tmfile";
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
}
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_MM_EQ:
{
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
}
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
/* Enable EQ search */
fprintf(stderr, "[Quant Tools Info]: Step Search, enable EQ search\n");
quant_tool.quant_search();
quant_tool.model_file = "save_i8_eq.tmfile";
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, true);
break;
}
default:
{
fprintf(stderr, "Unsupported quantization type ... \n");
break;
}
}
fprintf(stderr, "\n---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/ ----\n");
return 0;
}
然后进行量化,选择min-max量化方式,量化过程和模型推理过程相同,是在模型推理过程中找到最优的量化方案。
主要是调用tengine的api。
编写Tenginetest.cpp
//tengine推理框架
int tengine_classify(const char *model_file, int loop_count, int num_thread, int affinity) {
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_DT_INT8;
opt.affinity = affinity;
if (init_tengine() != 0) {
fprintf(stderr, "Initial tengine failed.\n");
return -1;
}
fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());
/* create graph, load tengine model xxx.tmfile */
graph_t graph = create_graph(NULL, "tengine", model_file);
if (NULL == graph)
{
fprintf(stderr, "Create graph failed.\n");
return -1;
}
/* set the shape, data buffer of input_tensor of the graph */
int dims[] = { 1, 1, 1, 2048 }; // nchw
float *input_data = (float *)malloc(2048 * sizeof(float));
if (!input_data) {
fprintf(stderr, "Input_data malloc failed\n");
return -1;
}
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == NULL)
{
fprintf(stderr, "Get input tensor failed\n");
return -1;
}
if (set_tensor_shape(input_tensor, dims, 4) < 0)
{
fprintf(stderr, "Set input tensor shape failed\n");
return -1;
}
if (set_tensor_buffer(input_tensor, input_data, 2048 * sizeof(float)) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if (prerun_graph_multithread(graph, opt) < 0)
{
fprintf(stderr, "Prerun multithread graph failed.\n");
return -1;
}
//测试代码
/* prepare process input data, set the data mem to input tensor */
//std::default_random_engine e;
//std::uniform_real_distribution u(0, 1);
//for (uint16_t i = 0; i < 2048; ++i) {
// input_data[i] = u(e);
//}
//get_bear_data("\\mytinydnn\\test_X.mat", input_data);
//读取5个轴承数据进行测试
int n = 5;
while (n > 0) {
/* run graph */
double min_time = DBL_MAX;
double max_time = DBL_MIN;
double total_time = 0.;
get_bear_data("\\mytinydnn\\test_X.mat", input_data, n);
for (int i = 0; i < loop_count; i++)
{
double start = get_current_time();
if (run_graph(graph, 1) < 0)
{
fprintf(stderr, "Run graph failed\n");
return -1;
}
double end = get_current_time();
double cur = end - start;
total_time += cur;
if (min_time > cur)
min_time = cur;
if (max_time < cur)
max_time = cur;
}
fprintf(stderr, "\nmodel file : %s\n", model_file);
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", loop_count,
num_thread, total_time / loop_count, max_time, min_time);
fprintf(stderr, "--------------------------------------\n");
/* get the result of classification */
tensor_t output_tensor = get_graph_output_tensor(graph, 0, 0);
float *output_data = (float *)get_tensor_buffer(output_tensor);
int output_size = get_tensor_buffer_size(output_tensor) / sizeof(float);
print_topk(output_data, output_size, 5);
fprintf(stderr, "--------------------------------------\n");
n--;
}
/* release tengine */
free(input_data);
postrun_graph(graph);
destroy_graph(graph);
release_tengine();
return 0;
}
export LD_LIBRARY_PATH= ~/Tengine/build/source
g++ -std=gnu++11 ./Tenginetest.cpp -o TengineTest libtengine-lite.so /usr/local/lib/libmatio.a -lz