隔了好久来填坑,这次写一下怎么在板卡中部署模型。
打个小广告:
海思hi3519av100开发板链接:
https://item.taobao.com/item.htm?spm=a230r.1.14.117.4afe75a61WreAX&id=586610485052&ns=1&abbucket=1#detail
除了SDK与底板图纸之外我们提供了EMMC文件配置和摄像头采集到RFCN深度神经网络的物体识别和HDMI显示的完整代码,帮助开发者快速部署模型。
开发板中新更新了yolov3的demo,那么从demo中把NNIE相关的截取出来和大家分享一下。
/******************************************************************************
* function : ive sample
******************************************************************************/
#ifdef __HuaweiLite__
int app_main(int argc, char *argv[])
#else
int main(int argc, char *argv[])
#endif
{
int s32Ret = HI_SUCCESS;
s_ppChCmdArgv = argv;
SAMPLE_SVP_NNIE_Yolov3();
return s32Ret;
}
主函数很简单,直接跑了demo,读取图片后进行物体识别。
具体函数放在 ../深度学习demo\yolov3\yolov3\sample 中 sample_nnie.c。
/******************************************************************************
* function : show YOLOV3 sample(image 416x416 U8_C3)
******************************************************************************/
void SAMPLE_SVP_NNIE_Yolov3(void)
{
HI_CHAR *pcSrcFile = "./data/nnie_image/rgb_planar/dog_bike_car_416x416.bgr";
HI_CHAR *pcModelName = "./data/nnie_model/detection/inst_yolov3_cycle.wk";
HI_U32 u32PicNum = 1;
HI_FLOAT f32PrintResultThresh = 0.0f;
HI_S32 s32Ret = HI_SUCCESS;
SAMPLE_SVP_NNIE_CFG_S stNnieCfg = {0};
SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
/*Set configuration parameter*/
f32PrintResultThresh = 0.15f;
stNnieCfg.pszPic= pcSrcFile;
stNnieCfg.u32MaxInputNum = u32PicNum; //max input image num in each batch
stNnieCfg.u32MaxRoiNum = 0;
stNnieCfg.aenNnieCoreId[0] = SVP_NNIE_ID_0;//set NNIE core
/*Sys init*/
SAMPLE_COMM_SVP_CheckSysInit();
/*Yolov3 Load model*/
SAMPLE_SVP_TRACE_INFO("Yolov3 Load model!\n");
s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel(pcModelName,&s_stYolov3Model);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,YOLOV3_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");
/*Yolov3 parameter initialization*/
/*Yolov3 software parameters are set in SAMPLE_SVP_NNIE_Yolov3_SoftwareInit,
if user has changed net struct, please make sure the parameter settings in
SAMPLE_SVP_NNIE_Yolov3_SoftwareInit function are correct*/
SAMPLE_SVP_TRACE_INFO("Yolov3 parameter initialization!\n");
s_stYolov3NnieParam.pstModel = &s_stYolov3Model.stModel;
s32Ret = SAMPLE_SVP_NNIE_Yolov3_ParamInit(&stNnieCfg,&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,YOLOV3_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_Yolov3_ParamInit failed!\n");
/*Fill src data*/
SAMPLE_SVP_TRACE_INFO("Yolov3 start!\n");
stInputDataIdx.u32SegIdx = 0;
stInputDataIdx.u32NodeIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_FillSrcData(&stNnieCfg,&s_stYolov3NnieParam,&stInputDataIdx);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,YOLOV3_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");
gettimeofday(&tp,NULL);
g_time_start = tp.tv_sec * 1000 + tp.tv_usec/1000;
/*NNIE process(process the 0-th segment)*/
stProcSegIdx.u32SegIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_Forward(&s_stYolov3NnieParam,&stInputDataIdx,&stProcSegIdx,HI_TRUE);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,YOLOV3_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_Forward failed!\n");
/*Software process*/
/*if user has changed net struct, please make sure SAMPLE_SVP_NNIE_Yolov3_GetResult
function input datas are correct*/
s32Ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,YOLOV3_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_Yolov3_GetResult failed!\n");
/*print result, this sample has 81 classes:
class 0:background class 1:person class 2:bicycle class 3:car class 4:motorbike class 5:aeroplane
class 6:bus class 7:train class 8:truck class 9:boat class 10:traffic light
class 11:fire hydrant class 12:stop sign class 13:parking meter class 14:bench class 15:bird
class 16:cat class 17:dog class 18:horse class 19:sheep class 20:cow
class 21:elephant class 22:bear class 23:zebra class 24:giraffe class 25:backpack
class 26:umbrella class 27:handbag class 28:tie class 29:suitcase class 30:frisbee
class 31:skis class 32:snowboard class 33:sports ball class 34:kite class 35:baseball bat
class 36:baseball glove class 37:skateboard class 38:surfboard class 39:tennis racket class 40bottle
class 41:wine glass class 42:cup class 43:fork class 44:knife class 45:spoon
class 46:bowl class 47:banana class 48:apple class 49:sandwich class 50orange
class 51:broccoli class 52:carrot class 53:hot dog class 54:pizza class 55:donut
class 56:cake class 57:chair class 58:sofa class 59:pottedplant class 60bed
class 61:diningtable class 62:toilet class 63:vmonitor class 64:laptop class 65:mouse
class 66:remote class 67:keyboard class 68:cell phone class 69:microwave class 70:oven
class 71:toaster class 72:sink class 73:refrigerator class 74:book class 75:clock
class 76:vase class 77:scissors class 78:teddy bear class 79:hair drier class 80:toothbrush*/
SAMPLE_SVP_TRACE_INFO("Yolov3 result:\n");
(void)SAMPLE_SVP_NNIE_Detection_PrintResult(&s_stYolov3SoftwareParam.stDstScore,
&s_stYolov3SoftwareParam.stDstRoi, &s_stYolov3SoftwareParam.stClassRoiNum,f32PrintResultThresh);
gettimeofday(&tp1,NULL);
g_time_end = tp1.tv_sec * 1000 + tp1.tv_usec/1000;
printf("yolov3 time : %d ms .\n", g_time_end-g_time_start);
YOLOV3_FAIL_0:
SAMPLE_SVP_NNIE_Yolov3_Deinit(&s_stYolov3NnieParam,&s_stYolov3SoftwareParam,&s_stYolov3Model);
SAMPLE_COMM_SVP_CheckSysExit();
}
是采用完整版YOLOV3的416x416大小输入的模型。从中截取几个函数看一下。
首先是初始化函数:
/******************************************************************************
* function : Yolov3 init
******************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
HI_S32 s32Ret = HI_SUCCESS;
/*init hardware para*/
s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg,pstNnieParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n",s32Ret);
/*init software para*/
s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(pstCfg,pstNnieParam,
pstSoftWareParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n",s32Ret);
return s32Ret;
INIT_FAIL_0:
s32Ret = SAMPLE_SVP_NNIE_Yolov3_Deinit(pstNnieParam,pstSoftWareParam,NULL);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n",s32Ret);
return HI_FAILURE;
}
YOLOV3的相关参数在 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit 函数中进行设置:
/******************************************************************************
* function : Yolov3 software para init
******************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
HI_S32 s32Ret = HI_SUCCESS;
HI_U32 u32ClassNum = 0;
HI_U32 u32TotalSize = 0;
HI_U32 u32DstRoiSize = 0;
HI_U32 u32DstScoreSize = 0;
HI_U32 u32ClassRoiNumSize = 0;
HI_U32 u32TmpBufTotalSize = 0;
HI_U64 u64PhyAddr = 0;
HI_U8* pu8VirAddr = NULL;
pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
pstSoftWareParam->u32BboxNumEachGrid = 3;
pstSoftWareParam->u32ClassNum = 80;
pstSoftWareParam->au32GridNumHeight[0] = 13;
pstSoftWareParam->au32GridNumHeight[1] = 26;
pstSoftWareParam->au32GridNumHeight[2] = 52;
pstSoftWareParam->au32GridNumWidth[0] = 13;
pstSoftWareParam->au32GridNumWidth[1] = 26;
pstSoftWareParam->au32GridNumWidth[2] = 52;
pstSoftWareParam->u32NmsThresh = (HI_U32)(0.15f*SAMPLE_SVP_NNIE_QUANT_BASE);
pstSoftWareParam->u32ConfThresh = (HI_U32)(0.25f*SAMPLE_SVP_NNIE_QUANT_BASE);
pstSoftWareParam->u32MaxRoiNum = 10;
pstSoftWareParam->af32Bias[0][0] = 116;
pstSoftWareParam->af32Bias[0][1] = 90;
pstSoftWareParam->af32Bias[0][2] = 156;
pstSoftWareParam->af32Bias[0][3] = 198;
pstSoftWareParam->af32Bias[0][4] = 373;
pstSoftWareParam->af32Bias[0][5] = 326;
pstSoftWareParam->af32Bias[1][0] = 30;
pstSoftWareParam->af32Bias[1][1] = 61;
pstSoftWareParam->af32Bias[1][2] = 62;
pstSoftWareParam->af32Bias[1][3] = 45;
pstSoftWareParam->af32Bias[1][4] = 59;
pstSoftWareParam->af32Bias[1][5] = 119;
pstSoftWareParam->af32Bias[2][0] = 10;
pstSoftWareParam->af32Bias[2][1] = 13;
pstSoftWareParam->af32Bias[2][2] = 16;
pstSoftWareParam->af32Bias[2][3] = 30;
pstSoftWareParam->af32Bias[2][4] = 33;
pstSoftWareParam->af32Bias[2][5] = 23;
/*Malloc assist buffer memory*/
u32ClassNum = pstSoftWareParam->u32ClassNum+1;
... /*部分省略*/
...
return s32Ret;
}
这里面涉及到的数字参数都是在Ruyi中模型量化的时候对应的参数,如果默认用80类yolov3的话基本不用修改。
u32ClassNum代表识别的类别数,这里是默认80类。
主程序里面 f32PrintResultThresh 参数是识别为物体的阈值,这里配置的值是 0.15f,即大于15%的概率时是真实物体。
由于海思模型量化后概率值量级分化很严重,所以阈值可以设置的比较低,与原始模型的阈值设置有些区别。
/******************************************************************************
* function : print detection result
******************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore,
SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
{
HI_U32 i = 0, j = 0;
HI_U32 u32RoiNumBias = 0;
HI_U32 u32ScoreBias = 0;
HI_U32 u32BboxBias = 0;
HI_FLOAT f32Score = 0.0f;
HI_S32* ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstDstScore->u64VirAddr);
HI_S32* ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstDstRoi->u64VirAddr);
HI_S32* ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,pstClassRoiNum->u64VirAddr);
HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
HI_S32 s32XMin = 0,s32YMin= 0,s32XMax = 0,s32YMax = 0;
u32RoiNumBias += ps32ClassRoiNum[0];
for (i = 1; i < u32ClassNum; i++)
{
u32ScoreBias = u32RoiNumBias;
u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
/*if the confidence score greater than result threshold, the result will be printed*/
if((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
f32PrintResultThresh && ps32ClassRoiNum[i]!=0)
{
SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);
}
for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++)
{
f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
if (f32Score < f32PrintResultThresh)
{
break;
}
s32XMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM];
s32YMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 1];
s32XMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 2];
s32YMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 3];
SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);
}
u32RoiNumBias += ps32ClassRoiNum[i];
}
return HI_SUCCESS;
}
上面部分是打印识别到物体类别信息。
最后打印从图片输入到识别输出的时间延迟。
g_time_end = tp1.tv_sec * 1000 + tp1.tv_usec/1000;
printf("yolov3 time : %d ms .\n", g_time_end-g_time_start);
目前运行未做修改的完整版 yolov3时间在70ms左右一张是比较正常的。
开发板资料中 ..深度学习demo\yolov3\data\nnie_model\detection 中提供了已经量化后的 YOLOV3模型,
是从Darknet YOLOV3模型上转成caffe下的yolov3模型的。
..深度学习demo\yolov3\data\nnie_image\rgb_planar 中提供了可供测试的 dog_bike_car.jpg和转换后的.bgr图片。