大致流程---------------------------------------------------------------------------------------------------------------------
模型转换----------------------------------------------------------------------------------------------------
1:准备MNIST的onnx模型---> https://kdocs.cn/l/ctULnY8mxXuE
github地址--> GitHub - warren-wzw/MNIST-pytorch
搭建好ATC的环境--> https://kdocs.cn/l/cjeQxGjfojsX
首先设置环境变量
source /home/warren/Ascend/ascend-toolkit/latest/x86_64-linux/bin/setenv.bash
转化模型
atc --model=/home/warren/Ascend/yolov5/model/yolov5s.onnx \
--soc_version=OPTG --framework=5 \
--output=/home/warren/Ascend/yolov5/model/yolov5s \
--input_shape="input0:1,2,64,64"
atc --model=/home/warren/ss928/NNN_PC/amct/amct_onnx/sample/MNIST/outputs/calibration/MNIST_deploy_model.onnx \
> --soc_version=OPTG --framework=5 \
> --output=/home/warren/ss928/NNN_PC/amct/amct_onnx/sample/MNIST/outputs/calibration/MNIST \
> --input_shape="input0:1,3,640,640"
模型转化成功后得到以下文件
模型量化--------------------------------------------------------------------------------------------------------------
目录结构
import os
import argparse
import cv2
import numpy as np
import onnxruntime as ort
import time
import torch
import amct_onnx as amct
PATH = os.path.realpath('./')
DATA_DIR = os.path.join(PATH, 'data')
PARSER = argparse.ArgumentParser(description='amct_onnx MNIST quantization sample.')
ARGS = PARSER.parse_args()
OUTPUTS = os.path.join(PATH, 'outputs/calibration')
TMP = os.path.join(OUTPUTS, 'tmp')
def onnx_forward(onnx_model, batch_size=1, iterations=100):
ort_session = ort.InferenceSession(onnx_model, amct.AMCT_SO)
with open("./data/train-images-idx3-ubyte","rb") as f:
file = f.read()
num = -1
j=0
inference_time =[0]
for j in range(100):
num=num+1
i = 16+784*num
image1 = [int(str(item).encode('ascii'),16) for item in file[i:i+784]]
input_data = np.array(image1,dtype=np.float32).reshape(1,1,28,28)
#np.set_printoptions(linewidth=150)
#print(input_data)
input_name = ort_session.get_inputs()[0].name
# inference
start_time = time.time()
output = ort_session.run(None, {input_name: input_data})
end_time = time.time()
inference_time.append(end_time - start_time)
# 处理输出结果
output = torch.tensor(output[0]) # 将输出转换为 PyTorch 张量
#print(output_tensor)
# 输出结果处理和后续操作...
pred =np.argmax(output)
print("------------------------The num of this pic is ",pred,"use time ",inference_time[num]*1000,"ms",j)
def main():
model_file = './model/model.onnx'
print('[INFO] Do original model test:')
onnx_forward(model_file,1,1)
config_json_file = os.path.join(TMP, 'config.json')
skip_layers = []
amct.create_quant_config(
config_file=config_json_file, model_file=model_file, skip_layers=skip_layers, batch_num=1,
activation_offset=True, config_defination=None)
# Phase1: do conv+bn fusion, weights calibration and generate
# calibration model
scale_offset_record_file = os.path.join(TMP, 'record.txt')
modified_model = os.path.join(TMP, 'modified_model.onnx')
amct.quantize_model(
config_file=config_json_file, model_file=model_file, modified_onnx_file=modified_model,
record_file=scale_offset_record_file)
onnx_forward(modified_model, 32, 1)
# Phase3: save final model, one for onnx do fake quant test, one
# deploy model for ATC
result_path = os.path.join(OUTPUTS, 'MNIST')
amct.save_model(modified_model, scale_offset_record_file, result_path)
# Phase4: run fake_quant model test
print('[INFO] Do quantized model test:')
onnx_forward('%s_%s' % (result_path, 'fake_quant_model.onnx'), 1, 1)
if __name__ == '__main__':
main()
推理代码编写---------------------------------------------------------------------------------------------------
将官方的sample复制一份改为MNIST,目录结构如下图所示
更改camke文件
1:添加环境变量:
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest
export NPU_HOST_LIB=$DDK_PATH/runtime/lib64/stub
2:创建build目录
mkdir -p build/intermediates/host
3:cmake ../../../src -DCMAKE_CXX_COMPILER=aarch64-mix210-linux-g++ -DCMAKE_SKIP_RPATH=TRUE
将整个MNIST文件夹拷贝至板端,添加库文件路径的环境变量
export ASCEND_GLOBAL_EVENT_ENABLE=0
export ASCEND_AACPU_KERNEL_PATH=/opt/sd/lib
export ASCEND_AICPU_KERNEL_PATH=/opt/sd/lib
export LD_LIBRARY_PATH=/opt/sd/lib
执行可执行文件main
执行成功。
代码讲解-------------------------------------------------------------------------
大致逻辑
main.cpp
#include "main.h"
#include "acl/acl.h"
#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__)
#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__)
#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__)
const int MODEL_CHANNEL = 1;
const int MODEL_IN_WIDTH = 28;
const int MODEL_IN_HEIGHT = 28;
const int loop_count = 1000;
typedef enum Result {
SUCCESS = 0,
FAILED = 1
} Result;
static inline int64_t getCurrentTimeUs()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000 + tv.tv_usec;
}
void Load_data(int num,unsigned char * input_image)
{
int j=16+784*num;
FILE *file = fopen("../data/train-images-idx3-ubyte", "rb");
if (file == NULL) {
printf("can't open the file!\n");
}
fseek(file,j,SEEK_SET);
fread(input_image,sizeof(char),784,file);
//print
/* for(int i=0;ibuffer[j+1]){
temp = buffer[j];
buffer[j]=buffer[j+1];
buffer[j+1]=temp;
}
}
}
}
int main()
{
/***************************************************/
/*****************define var************************/
/***************************************************/
int num=0;
aclError ret=1;
const char *aclConfigPath = "../src/acl.json";
int32_t deviceId_=0;
aclrtContext context_=nullptr;
aclrtStream stream_=nullptr;
aclrtRunMode runMode;
uint32_t modelId_=0;
const char* modelPath = "../model/MNIST.om";
aclmdlDesc *modelDesc_;
aclmdlDataset *output_;
aclmdlDataset *input_;
void * inputDataBuffer = nullptr;
size_t size = 784;
void* input_image_original;
void* time_ori;
int64_t sum=0;
int64_t start_time=0;
int64_t end_time=0;
/***************************************************/
/*****************Init ACL**************************/
/***************************************************/
ret = aclInit(aclConfigPath);
if (ret != ACL_SUCCESS) {
ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret));
return FAILED;
}
INFO_LOG("--------------------acl init success");
/***************************************************/
/*****************apply resource********************/
/***************************************************/
// set device only one device
ret = aclrtSetDevice(deviceId_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret));
return FAILED;
}
INFO_LOG("set device %d success", deviceId_);
// create context (set current)
ret = aclrtCreateContext(&context_, deviceId_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d",
deviceId_, static_cast(ret));
return FAILED;
}
INFO_LOG("create context success");
// create stream
ret = aclrtCreateStream(&stream_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d",
deviceId_, static_cast(ret));
return FAILED;
}
INFO_LOG("create stream success");
// get run mode
ret = aclrtGetRunMode(&runMode);
if (ret != ACL_SUCCESS) {
ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret));
return FAILED;
}
/***************************************************/
/********load model and get infos of model**********/
/***************************************************/
ret = aclmdlLoadFromFile(modelPath,&modelId_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("load model from file failed, model file is %s, errorCode is %d",
modelPath, static_cast(ret));
return FAILED;
}
INFO_LOG("load model %s success id is %d\n", modelPath,modelId_);
//get model describe
modelDesc_ = aclmdlCreateDesc();
if (modelDesc_ == nullptr) {
ERROR_LOG("create model description failed");
return FAILED;
}
ret = aclmdlGetDesc(modelDesc_, modelId_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("get model description failed, modelId is %u, errorCode is %d",
modelId_, static_cast(ret));
return FAILED;
}
INFO_LOG("create model description success");
/***************************************************/
/******************print input tensor***************/
/***************************************************/
/* aclmdlIODims *dim;
ret=aclmdlGetInputDims(modelDesc_,0,dim);
printf("----------------in dims is %d \n",dim->dimCount);
printf("----------------in dims name is: %s dims: \n",dim->name);
for(int num=0;numdimCount;num++){
printf("%d ",num,dim->dims[num]);
}
ret = aclmdlGetOutputDims(modelDesc_,0,dim);
printf("----------------out dims is %d \n",dim->dimCount);
printf("----------------out dims name is: %s dims:\n",dim->name);
for(int num=0;numdimCount;num++){
printf("%d \n",num,dim->dims[num]);
}
deviceId_=0;*/
/***************************************************/
/******************prepare output data buffer***************/
/***************************************************/
output_ = aclmdlCreateDataset();
if (output_ == nullptr) {
ERROR_LOG("can't create dataset, create output failed");
return FAILED;
}
size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
for (size_t i = 0; i < outputSize; ++i) {
size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i);
void *outputBuffer = nullptr;
ret = aclrtMalloc(&outputBuffer, modelOutputSize, ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_SUCCESS) {
ERROR_LOG("can't malloc buffer, size is %zu, create output failed, errorCode is %d",
modelOutputSize, static_cast(ret));
return FAILED;
}
//apply output buffer
aclDataBuffer *outputData = aclCreateDataBuffer(outputBuffer, modelOutputSize);
if (outputData == nullptr) {
ERROR_LOG("can't create data buffer, create output failed");
(void)aclrtFree(outputBuffer);
return FAILED;
}
ret = aclmdlAddDatasetBuffer(output_, outputData);
if (ret != ACL_SUCCESS) {
ERROR_LOG("can't add data buffer, create output failed, errorCode is %d",
static_cast(ret));
(void)aclrtFree(outputBuffer);
(void)aclDestroyDataBuffer(outputData);
return FAILED;
}
}
INFO_LOG("create model output success");
/***************************************************/
/******************prepare input data***************/
/***************************************************/
if (modelDesc_ == nullptr) {
ERROR_LOG("no model description, create input failed");
return FAILED;
}
input_ = aclmdlCreateDataset();
if (input_ == nullptr) {
ERROR_LOG("can't create dataset, create input failed");
return FAILED;
}
size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, 0);
ret = aclrtMalloc(&input_image_original, 784, ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
size, static_cast(ret));
return FAILED;
}
unsigned char * input_image = static_cast(input_image_original);
void* input_image_float_ori;
ret = aclrtMalloc(&input_image_float_ori, 784*sizeof(float), ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
size, static_cast(ret));
return FAILED;
}
float * input_image_float = static_cast(input_image_float_ori);;
Load_data(num,input_image);
for(int num=0;num<784;num++){
input_image_float[num]=(float)input_image[num];
}
/* aclrtFree(input_image);
input_image=nullptr; */
aclDataBuffer *inputData = aclCreateDataBuffer(input_image_float, modelInputSize);
if (inputData == nullptr) {
ERROR_LOG("can't create data buffer, create input failed");
return FAILED;
}
ret = aclmdlAddDatasetBuffer(input_, inputData);
if (ret != ACL_SUCCESS) {
ERROR_LOG("add input dataset buffer failed, errorCode is %d", static_cast(ret));
(void)aclDestroyDataBuffer(inputData);
inputData = nullptr;
return FAILED;
}
INFO_LOG("create model input success");
ret = aclrtMalloc(&time_ori, loop_count*sizeof(int64_t), ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
loop_count*sizeof(int64_t), static_cast(ret));
return FAILED;
}
int64_t * time = static_cast(time_ori);
for(int loop_time=0;loop_time < loop_count;loop_time++){
num++;
Load_data(num,input_image);
for(int loop_num=0;loop_num<784;loop_num++){
input_image_float[loop_num]=(float)input_image[loop_num];
}
void* data = aclGetDataBufferAddr(inputData);
uint32_t len = aclGetDataBufferSizeV2(inputData);
float *indata = NULL;
indata = reinterpret_cast(data);
/***************************************************/
/******************inference************************/
/***************************************************/
start_time = getCurrentTimeUs();
ret = aclmdlExecute(modelId_, input_, output_);
end_time = getCurrentTimeUs();
time[loop_time]=end_time-start_time;
sum=sum+time[loop_time];
printf("---Elapse Time = %.3f ms \n", (end_time-start_time) / 1000.f);
/***************************************************/
/******************post process*********************/
/***************************************************/
// get model output data
aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, 0);
void* data_1 = aclGetDataBufferAddr(dataBuffer);
uint32_t len_1 = aclGetDataBufferSizeV2(dataBuffer);
float *outData = NULL;
outData = reinterpret_cast(data_1);
void* buffer_copy_ori;
ret = aclrtMalloc(&buffer_copy_ori, len_1*sizeof(float), ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_SUCCESS) {
ERROR_LOG("malloc device buffer failed. size is %zu, errorCode is %d",
len_1, static_cast(ret));
return FAILED;
}
float * buffer_copy = static_cast(buffer_copy_ori);
for(int i_1 = 0; i_1 < len_1/sizeof(*outData);i_1++){
buffer_copy[i_1]=outData[i_1];
}
Bubble_sort(outData,len_1/sizeof(*outData));
for(int i_2 =0;i_2(ret));
}
stream_ = nullptr;
}
INFO_LOG("end to destroy stream");
if (context_ != nullptr) {
ret = aclrtDestroyContext(context_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret));
}
context_ = nullptr;
}
INFO_LOG("end to destroy context");
ret = aclrtResetDevice(deviceId_);
if (ret != ACL_SUCCESS) {
ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret));
}
INFO_LOG("end to reset device %d", deviceId_);
ret = aclFinalize();
if (ret != ACL_SUCCESS) {
ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret));
}
INFO_LOG("end to finalize acl");
}
执行结果:
fp32
int8