生成 onnx
模型
python tools/export_onnx.py --config configs/bisenetv2_city.py --weight-path ./checkpoints/model_final_v2_city.pth --outpath ./checkpoints/model_final_v2_city.onnx --no-onnxsim
mean
和 std
参考自己数据集中的.datasets.txt
用于量化 ,datasets_ans
用于精度分析asymmetric_affine-u8
from rknn.api import RKNN
ONNX_MODEL = './model/model_final_v2_city.onnx'
RKNN_MODEL = './model/model_final_v2_city_u8.rknn'
QUANTIZE_ON = True
_force_builtin_perm = False
_acc_analysis_output_dir = './output_dir'
_acc_analysis_dataset = './images/city/datasets_ans.txt'
_qua_dataset = './images/city/datasets.txt'
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN(verbose=True)
# pre-process config
# asymmetric_affine-u8, dynamic_fixed_point-i8, dynamic_fixed_point-i16
print('--> config model')
rknn.config(
reorder_channel='0 1 2',
mean_values=[[83.0535, 94.095, 82.1865]],
std_values=[[53.856, 54.774, 75.786]],
optimization_level=3,
target_platform = 'rv1126',
quantize_input_node= QUANTIZE_ON,
quantized_dtype='asymmetric_affine-u8',
batch_size=32,
output_optimize=1,
force_builtin_perm=_force_builtin_perm)
print('done')
print('--> Loading model')
# ret = rknn.load_onnx(model=ONNX_MODEL, outputs=['output0', 'output1'])
ret = rknn.load_onnx(model=ONNX_MODEL, outputs=['preds'])
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=_qua_dataset,pre_compile=True)
if ret != 0:
print('Build pp_liteseg_stdc1_camvid_960x720_10k_model failed!')
exit(ret)
print('done')
# Export rknn model
print('--> Export RKNN model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
print('Export failed!')
exit(ret)
print('done')
print('--> Accuracy analysis')
ret = rknn.accuracy_analysis(inputs=_acc_analysis_dataset,output_dir=_acc_analysis_output_dir)
if ret != 0:
print('accuracy_analysis failed!')
exit(ret)
print('done')
rknn.release()
ncnn
和 tensorrt
demo 实现.#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "rknn_api.h"
#include "opencv2/opencv.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include
#include
#include
#include
#include
#include
#include
using namespace std;
using namespace cv;
void printRKNNTensor(rknn_tensor_attr *attr)
{
printf("index=%d name=%s n_dims=%d dims=[%d %d %d %d] n_elems=%d size=%d "
"fmt=%d type=%d qnt_type=%d fl=%d zp=%d scale=%f\n",
attr->index, attr->name, attr->n_dims, attr->dims[3], attr->dims[2],
attr->dims[1], attr->dims[0], attr->n_elems, attr->size, 0, attr->type,
attr->qnt_type, attr->fl, attr->zp, attr->scale);
}
vector<vector<uint8_t>> get_color_map()
{
vector<vector<uint8_t>> color_map(256, vector<uint8_t>(3));
std::minstd_rand rand_eng(123);
std::uniform_int_distribution<uint8_t> u(0, 255);
for (int i{0}; i < 256; ++i)
{
for (int j{0}; j < 3; ++j)
{
color_map[i][j] = u(rand_eng);
}
}
return color_map;
}
cv::Mat static_resize(cv::Mat &img, int INPUT_W, int INPUT_H)
{
float r = std::min(INPUT_W / (img.cols * 1.0), INPUT_H / (img.rows * 1.0));
// r = std::min(r, 1.0f);
int unpad_w = r * img.cols;
int unpad_h = r * img.rows;
cv::Mat re(unpad_h, unpad_w, CV_8UC3);
cv::resize(img, re, re.size());
cv::Mat out(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(114, 114, 114));
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
return out;
}
int main(int argc, char *argv[])
{
std::string model_path = std::string(argv[1]);
// std::string imagepath = std::string(argv[2]);
std::string folder_path = std::string(argv[2]);
int input_width_ = std::atoi(argv[3]);
int input_height_ = std::atoi(argv[4]);
std::vector<cv::String> file_names;
cv::glob(folder_path, file_names);
int oH{input_height_}, oW{input_width_}, n_classes{2};
// Load model
FILE *fp = fopen(model_path.c_str(), "rb");
if (fp == NULL)
{
printf("fopen %s fail!\n", model_path);
return -1;
}
fseek(fp, 0, SEEK_END);
int model_len = ftell(fp);
void *model = malloc(model_len);
fseek(fp, 0, SEEK_SET);
if (model_len != fread(model, 1, model_len, fp))
{
printf("fread %s fail!\n", model_path);
free(model);
return -1;
}
rknn_context ctx = 0;
int ret = rknn_init(&ctx, model, model_len, 0);
if (ret < 0)
{
printf("rknn_init fail! ret=%d\n", ret);
return -1;
}
/* Query sdk version */
rknn_sdk_version version;
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version,
sizeof(rknn_sdk_version));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("sdk version: %s driver version: %s\n", version.api_version,
version.drv_version);
/* Get input,output attr */
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input,
io_num.n_output);
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
sizeof(rknn_tensor_attr));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printRKNNTensor(&(input_attrs[i]));
}
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
sizeof(rknn_tensor_attr));
printRKNNTensor(&(output_attrs[i]));
}
int input_channel = 3;
int input_width = 0;
int input_height = 0;
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
input_width = input_attrs[0].dims[0];
input_height = input_attrs[0].dims[1];
printf("input_width=%d input_height=%d\n", input_width, input_height);
}
else
{
printf("model is NHWC input fmt\n");
input_width = input_attrs[0].dims[1];
input_height = input_attrs[0].dims[2];
printf("input_width=%d input_height=%d\n", input_width, input_height);
}
printf("model input height=%d, width=%d, channel=%d\n", input_height, input_width,
input_channel);
for (size_t i = 0; i < file_names.size(); i++)
{
cv::Mat im = cv::imread(file_names[i]);
auto t1 = std::chrono::steady_clock::now();
Mat pr_img;
cv::resize(im, pr_img, cv::Size(oW, oH));
cv::cvtColor(pr_img, pr_img, cv::COLOR_BGR2RGB);
/* Init input tensor */
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].buf = pr_img.data;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].size = input_width * input_height * input_channel;
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].pass_through = 0;
/* Init output tensor */
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < io_num.n_output; i++)
{
outputs[i].want_float = 1;
}
rknn_inputs_set(ctx, io_num.n_input, inputs);
ret = rknn_run(ctx, NULL);
if (ret < 0)
{
printf("ctx error ret=%d\n", ret);
return -1;
}
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
if (ret < 0)
{
printf("outputs error ret=%d\n", ret);
return -1;
}
vector<vector<uint8_t>> color_map = get_color_map();
cv::Mat pred(cv::Size(oW, oH), CV_8UC3);
int o_size = input_width * input_height * 4;
float *prob = new float[o_size];
memcpy(prob, (float *)outputs[0].buf, o_size);
int idx{0};
for (int i{0}; i < oH; ++i)
{
uint8_t *ptr = pred.ptr<uint8_t>(i);
for (int j{0}; j < oW; ++j)
{
ptr[0] = color_map[prob[idx]][0];
ptr[1] = color_map[prob[idx]][1];
ptr[2] = color_map[prob[idx]][2];
ptr += 3;
++idx;
}
}
// resize back and save
cv::resize(pred, pred, im.size(), cv::INTER_CUBIC);
cv::imwrite(cv::format("./out/%d.jpg", i), pred);
ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
if (ret < 0)
{
printf("rknn_query fail! ret=%d\n", ret);
goto Error;
}
}
Error:
if (ctx > 0)
rknn_destroy(ctx);
if (model)
free(model);
if (fp)
fclose(fp);
return 0;
}
adb
将程序拷贝到板载上调用./bisenet_seg_npu_sample ./model/model_final_v2_city_u8.rknn ./images 1024 512
如果感觉模型识别效果不是那么好,可以尝试这使用混合量化,找到一个速度兼精度的一个平衡点.
hybrid_quantization_step1.py
from rknn.api import RKNN
ONNX_MODEL = './model/model_final_v2_city.onnx'
RKNN_MODEL = './model/model_final_v2_city_u8.rknn'
QUANTIZE_ON = True
_qua_dataset = './images/city/datasets.txt'
_force_builtin_perm = False
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN()
# model config
print('--> Config model')
rknn.config(reorder_channel='0 1 2',
mean_values=[[83.0535, 94.095, 82.1865]],
std_values=[[53.856, 54.774, 75.786]],
optimization_level=3,
target_platform='rk1126',
output_optimize=1,
quantized_dtype='asymmetric_affine-u8',
quantize_input_node= QUANTIZE_ON,
batch_size=32,
force_builtin_perm=False
)
print('done')
# Load onnx model
print('--> Loading model')
ret = rknn.load_onnx(model=ONNX_MODEL)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Hybrid quantization step1
print('--> hybrid_quantization_step1')
ret = rknn.hybrid_quantization_step1(dataset=_qua_dataset)
if ret != 0:
print('hybrid_quantization_step1 failed!')
exit(ret)
print('done')
print('==================================================================================================')
rknn.release()
hybrid_quantization_step2.py
, 根据精度分析结果,在 torchjitexport.quantization.cfg
中 ,将误差较大的层,换成 float
或 dynamic_fixed_point-i16
等精度高的量化类型.
from rknn.api import RKNN
ONNX_MODEL = './model/model_final_v2_city.onnx'
RKNN_MODEL = './model/model_final_v2_city_u8_hyqua.rknn'
QUANTIZE_ON = True
_force_builtin_perm = False
_qua_dataset = './images/city/datasets.txt'
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN()
# Set model config
print('--> config model')
rknn.config(reorder_channel='0 1 2',
mean_values=[[83.0535, 94.095, 82.1865]],
std_values=[[53.856, 54.774, 75.786]],
optimization_level=3,
target_platform='rk1126',
output_optimize=1,
quantized_dtype='asymmetric_affine-u8',
quantize_input_node= QUANTIZE_ON,
batch_size=32,
force_builtin_perm=False
)
print('done')
# Hybrid quantization step2
print('--> hybrid_quantization_step2')
ret = rknn.hybrid_quantization_step2(model_input='./torchjitexport.json',
data_input='./torchjitexport.data',
model_quantization_cfg='./torchjitexport.quantization.cfg',
dataset=_qua_dataset, pre_compile=True)
if ret != 0:
print('hybrid_quantization_step2 failed!')
exit(ret)
print('done')
# Export RKNN model
print('--> Export RKNN model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
print('Export RKNN model failed!')
exit(ret)
print('done')
rknn.release()