HI_CHAR *pcSrcFile = "./data/nnie_image/rgb_planar/dog_bike_car_416x416.bgr";
HI_CHAR *pcModelName = "./data/nnie_model/detection/inst_yolov3_cycle.wk";
HI_U32 u32PicNum = 1;
HI_FLOAT f32PrintResultThresh = 0.0f;
HI_S32 s32Ret = HI_SUCCESS;
SAMPLE_SVP_NNIE_CFG_S stNnieCfg = {0};
SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
结构体SAMPLE_SVP_NNIE_CFG_S:
typedef struct hiSAMPLE_SVP_NNIE_CFG_S
{
HI_CHAR *pszPic; //图片路径
HI_U32 u32MaxInputNum; //每个batch最大输入图片数
HI_U32 u32MaxRoiNum; //最大ROI数
HI_U64 au64StepVirAddr[SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM * SVP_NNIE_MAX_NET_SEG_NUM]; //virtual addr of LSTM's or RNN's step buffer
SVP_NNIE_ID_E aenNnieCoreId[SVP_NNIE_MAX_NET_SEG_NUM]; //选用段对应的核
} SAMPLE_SVP_NNIE_CFG_S;
主要涉及NNIE的设置
结构体SVP_NNIE_ID_E:
typedef enum hiSVP_NNIE_ID_E {
SVP_NNIE_ID_0 = 0x0,
SVP_NNIE_ID_1 = 0x1,
SVP_NNIE_ID_BUTT
} SVP_NNIE_ID_E;
主要定义了NNIE的核的枚举
结构体SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S:
typedef struct hiSAMPLE_SVP_NNIE_DATA_INDEX_S
{
HI_U32 u32SegIdx;
HI_U32 u32NodeIdx;
} SAMPLE_SVP_NNIE_DATA_INDEX_S;
typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S;
SegIdx是指段(但是Yolo不需要分段,因此仅有一段)
NodeIdx是指节点数
结构体SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S:
typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S;
同上
/*Set configuration parameter*/
f32PrintResultThresh = 0.8f;
stNnieCfg.pszPic= pcSrcFile;
stNnieCfg.u32MaxInputNum = u32PicNum; //max input image num in each batch
stNnieCfg.u32MaxRoiNum = 0;
stNnieCfg.aenNnieCoreId[0] = SVP_NNIE_ID_0;//set NNIE core
主要设置了SAMPLE_SVP_NNIE_CFG_S结构体的相关内容
SAMPLE_COMM_SVP_CheckSysInit();
主要进行初始化
HI_VOID SAMPLE_COMM_SVP_CheckSysInit(HI_VOID)
{
//s_bSampleSvpInit是一个bool类型变量,指示了是否进行了系统初始化
if (HI_FALSE == s_bSampleSvpInit)
{
if (SAMPLE_COMM_SVP_SysInit())
{
SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_ERROR, "Svp mpi init failed!\n");
exit(-1);
}
s_bSampleSvpInit = HI_TRUE;
}
SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_DEBUG, "Svp mpi init ok!\n");
}
static HI_S32 SAMPLE_COMM_SVP_SysInit(HI_VOID)
{
HI_S32 s32Ret = HI_FAILURE;
VB_CONFIG_S struVbConf;
HI_MPI_SYS_Exit();
HI_MPI_VB_Exit();
//将struVbConf所有内容赋值0
memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
struVbConf.u32MaxPoolCnt = 2;
struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2;
struVbConf.astCommPool[1].u32BlkCnt = 1;
//设置MPP 视频缓存池属性
//HI_S32 HI_MPI_VB_SetConfig(const VB_CONFIG_S *pstVbConfig);
//pstVbConfig 视频缓存池属性指针。静态属性。
//输入
s32Ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_SetConf failed!\n", s32Ret);
//初始化MPP 视频缓存池。
s32Ret = HI_MPI_VB_Init();
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_Init failed!\n", s32Ret);
//初始化MPP 系统。包括音频输入输出、视频输入输出、视频编解码、视频叠加区域、视频处理、图形处理等模块都会被初始化。
s32Ret = HI_MPI_SYS_Init();
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_SYS_Init failed!\n", s32Ret);
return s32Ret;
}
结构体VB_CONFIG_S:
typedef struct hiVB_CONFIG_S {
HI_U32 u32MaxPoolCnt;
VB_POOL_CONFIG_S astCommPool[VB_MAX_COMM_POOLS];
} VB_CONFIG_S;
结构体VB_POOL_CONFIG_S:
typedef struct hiVB_POOL_CONFIG_S {
HI_U64 u64BlkSize;
HI_U32 u32BlkCnt;
VB_REMAP_MODE_E enRemapMode;
HI_CHAR acMmzName[MAX_MMZ_NAME_LEN];
} VB_POOL_CONFIG_S;
static SAMPLE_SVP_NNIE_MODEL_S s_stYolov3Model = {0};
s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel(pcModelName,&s_stYolov3Model);
向NNIE核载入模型
结构体SAMPLE_SVP_NNIE_MODEL_S:
typedef struct hiSAMPLE_SVP_NNIE_MODEL_S
{
SVP_NNIE_MODEL_S stModel;
SVP_MEM_INFO_S stModelBuf; //store Model file
} SAMPLE_SVP_NNIE_MODEL_S;
结构体SVP_NNIE_MODEL_S
typedef struct hiSVP_NNIE_MODEL_S {
SVP_NNIE_RUN_MODE_E enRunMode;//枚举类型,网络模型运行模式
HI_U32 u32TmpBufSize; /* temp buffer size */
HI_U32 u32NetSegNum;
SVP_NNIE_SEG_S astSeg[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_NNIE_ROIPOOL_INFO_S astRoiInfo[SVP_NNIE_MAX_ROI_LAYER_NUM]; /* ROIPooling info */
SVP_MEM_INFO_S stBase;
} SVP_NNIE_MODEL_S;
主要存了模型用于NNIE核的一些属性
结构体SVP_MEM_INFO_S
/* Mem information */
typedef struct hiSVP_MEM_INFO_S {
HI_U64 u64PhyAddr; /* RW;The physical address of the memory */
HI_U64 u64VirAddr; /* RW;The virtual address of the memory */
HI_U32 u32Size; /* RW;The size of memory */
} SVP_MEM_INFO_S;
主要存了内存分配的信息
HI_S32 SAMPLE_COMM_SVP_NNIE_LoadModel(HI_CHAR *pszModelFile,
SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
HI_S32 s32Ret = HI_INVALID_VALUE;
HI_U64 u64PhyAddr = 0;
HI_U8 *pu8VirAddr = NULL;
HI_SL slFileSize = 0;
/*Get model file size*/
FILE *fp = fopen(pszModelFile, "rb");
SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, open model file failed!\n");
s32Ret = fseek(fp, 0L, SEEK_END);
SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
slFileSize = ftell(fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(slFileSize <= 0, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, ftell failed!\n");
s32Ret = fseek(fp, 0L, SEEK_SET);
SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
/*malloc model file mem*/
//申请内存,内存地址既有虚拟地址(进程内地址)和物理地址(内存线性地址)
s32Ret = SAMPLE_COMM_SVP_MallocMem("SAMPLE_NNIE_MODEL", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, slFileSize);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),Malloc memory failed!\n", s32Ret);
pstNnieModel->stModelBuf.u32Size = (HI_U32)slFileSize;
pstNnieModel->stModelBuf.u64PhyAddr = u64PhyAddr;
pstNnieModel->stModelBuf.u64VirAddr = (HI_U64)pu8VirAddr;
s32Ret = fread(pu8VirAddr, slFileSize, 1, fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,read model file failed!\n");
/*load model*/
//从用户事先加载到buf中的模型中解析出网络模型
// pstModelBuf 存储模型的buf,用户需事先开辟好,且将
// NNIE 编译器得到的wk文件加载到该buf中。 不能为空。 输入
// pstModel 网络模型结构体。输出
s32Ret = HI_MPI_SVP_NNIE_LoadModel(&pstNnieModel->stModelBuf, &pstNnieModel->stModel);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,HI_MPI_SVP_NNIE_LoadModel failed!\n");
fclose(fp);
return HI_SUCCESS;
FAIL_1:
SAMPLE_SVP_MMZ_FREE(pstNnieModel->stModelBuf.u64PhyAddr, pstNnieModel->stModelBuf.u64VirAddr);
pstNnieModel->stModelBuf.u32Size = 0;
FAIL_0:
if (NULL != fp)
{
fclose(fp);
}
return HI_FAILURE;
}
HI_S32 SAMPLE_COMM_SVP_MallocMem(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
{
HI_S32 s32Ret = HI_SUCCESS;
//mmz是海思用来存储媒体的内存,https://www.cnblogs.com/wlzy/p/9733110.html
/*pu64PhyAddr 物理地址指针。输出
*ppVirAddr 指向虚拟地址指针的指针。输出
*strMmb Mmb 名称的字符串指针。输入
*strZone MMZ zone 名称的字符串指针。输入
*u32Len 内存块大小。输入
*/
s32Ret = HI_MPI_SYS_MmzAlloc(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);
return s32Ret;
}
static SAMPLE_SVP_NNIE_PARAM_S s_stYolov3NnieParam = {0};
s_stYolov3NnieParam.pstModel = &s_stYolov3Model.stModel;
s32Ret = SAMPLE_SVP_NNIE_Yolov3_ParamInit(&stNnieCfg,&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
该部分主要进行模型的设置
结构体SAMPLE_SVP_NNIE_PARAM_S:
typedef struct hiSAMPLE_SVP_NNIE_PARAM_S
{
SVP_NNIE_MODEL_S *pstModel;
HI_U32 u32TmpBufSize;
HI_U32 au32TaskBufSize[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_MEM_INFO_S stTaskBuf;
SVP_MEM_INFO_S stTmpBuf;
SVP_MEM_INFO_S stStepBuf; //store Lstm step info
SAMPLE_SVP_NNIE_SEG_DATA_S astSegData[SVP_NNIE_MAX_NET_SEG_NUM]; //each seg's input and output blob
SVP_NNIE_FORWARD_CTRL_S astForwardCtrl[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_NNIE_FORWARD_WITHBBOX_CTRL_S astForwardWithBboxCtrl[SVP_NNIE_MAX_NET_SEG_NUM];
} SAMPLE_SVP_NNIE_PARAM_S;
结构体 hiSAMPLE_SVP_NNIE_SEG_DATA_S:
/*each seg input and output memory*/
typedef struct hiSAMPLE_SVP_NNIE_SEG_DATA_S
{
SVP_SRC_BLOB_S astSrc[SVP_NNIE_MAX_INPUT_NUM];
SVP_DST_BLOB_S astDst[SVP_NNIE_MAX_OUTPUT_NUM];
} SAMPLE_SVP_NNIE_SEG_DATA_S;
结构体SVP_BLOB_S:
typedef SVP_BLOB_S SVP_SRC_BLOB_S;
typedef SVP_BLOB_S SVP_DST_BLOB_S;
/****************************** Blob struct ******************************
In Caffe, the blob contain shape info as the following order:
Image\FeatureMap: N C H W
FC(normal vector): N C
RNN\LSTM(Recurrent) vector: T N D
The relationship of the following blob struct with Caffe blob is as follows:
Image\FeatureMap: Num Chn Height With
FC(VEC_S32): Num Width
RNN\LSTM(SEQ_S32) vector: Step Num Dim
The stride, which measuring unit is byte, is always algined by the width or
dim direction.
**************************************************************************/
typedef struct hiSVP_BLOB_S {
SVP_BLOB_TYPE_E enType; /* Blob type */
HI_U32 u32Stride; /* Stride, a line bytes num */
HI_U64 u64VirAddr; /* virtual addr */
HI_U64 u64PhyAddr; /* physical addr */
HI_U32 u32Num; /* N: frame num or sequence num, correspond to caffe blob's n */
union {
struct {
HI_U32 u32Width; /* W: frame width, correspond to caffe blob's w */
HI_U32 u32Height; /* H: frame height, correspond to caffe blob's h */
HI_U32 u32Chn; /* C: frame channel, correspond to caffe blob's c */
} stWhc;
struct {
HI_U32 u32Dim; /* D: vecotr dimension */
HI_U64 u64VirAddrStep; /* T: virtual adress of time steps array in each sequence */
} stSeq;
} unShape;
} SVP_BLOB_S;
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
HI_S32 s32Ret = HI_SUCCESS;
/*init hardware para*/
s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg,pstNnieParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n",s32Ret);
/*init software para*/
s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(pstCfg,pstNnieParam,
pstSoftWareParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n",s32Ret);
return s32Ret;
INIT_FAIL_0:
s32Ret = SAMPLE_SVP_NNIE_Yolov3_Deinit(pstNnieParam,pstSoftWareParam,NULL);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n",s32Ret);
return HI_FAILURE;
}
函数SAMPLE_COMM_SVP_NNIE_ParamInit
HI_S32 SAMPLE_COMM_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{
HI_S32 s32Ret = HI_SUCCESS;
/*check*/
SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieCfg || NULL == pstNnieParam), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieCfg and pstNnieParam can't be NULL!\n");
SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieParam->pstModel), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel can't be NULL!\n");
/*NNIE parameter initialization */
s32Ret = SAMPLE_SVP_NNIE_ParamInit(pstNnieCfg, pstNnieParam);
SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error, SAMPLE_SVP_NNIE_ParamInit failed!\n");
return s32Ret;
FAIL:
s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error, SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
return HI_FAILURE;
}
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_ParamInit
* Description : Fill info of NNIE Forward parameters
* Input : SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg NNIE configure parameter
* SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam NNIE parameters
*
*
*
* Output :
* Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-03-14
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{
HI_U32 i = 0, j = 0;
HI_U32 u32TotalSize = 0;
HI_U32 u32TotalTaskBufSize = 0;
HI_U32 u32TmpBufSize = 0;
HI_S32 s32Ret = HI_SUCCESS;
HI_U32 u32Offset = 0;
HI_U64 u64PhyAddr = 0;
HI_U8 *pu8VirAddr = NULL;
SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[SVP_NNIE_MAX_NET_SEG_NUM] = {0};
/*fill forward info*/
s32Ret = SAMPLE_SVP_NNIE_FillForwardInfo(pstNnieCfg, pstNnieParam);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_FillForwardCtrl failed!\n");
/*Get taskInfo and Blob mem size*/
s32Ret = SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(pstNnieCfg, pstNnieParam, &u32TotalTaskBufSize,
&u32TmpBufSize, astBlobSize, &u32TotalSize);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize failed!\n");
/*Malloc mem*/
s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_NNIE_TASK", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, u32TotalSize);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,Malloc memory failed!\n");
//分配到的内存进行初始化
memset(pu8VirAddr, 0, u32TotalSize);
SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);
/*fill taskinfo mem addr*/
pstNnieParam->stTaskBuf.u32Size = u32TotalTaskBufSize;
pstNnieParam->stTaskBuf.u64PhyAddr = u64PhyAddr;
pstNnieParam->stTaskBuf.u64VirAddr = (HI_U64)pu8VirAddr;
/*fill Tmp mem addr*/
pstNnieParam->stTmpBuf.u32Size = u32TmpBufSize;
pstNnieParam->stTmpBuf.u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize;
pstNnieParam->stTmpBuf.u64VirAddr = (HI_U64)pu8VirAddr + u32TotalTaskBufSize;
/*fill forward ctrl addr*/
for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
{
if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
{
pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
}
else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||
SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
{
pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
}
u32Offset += pstNnieParam->au32TaskBufSize[i];
}
/*fill each blob's mem addr*/
u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize + u32TmpBufSize;
pu8VirAddr = pu8VirAddr + u32TotalTaskBufSize + u32TmpBufSize;
for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
{
/*first seg has src blobs, other seg's src blobs from the output blobs of
those segs before it or from software output results*/
if (0 == i)
{
for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++)
{
if (j != 0)
{
u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
}
pstNnieParam->astSegData[i].astSrc[j].u64PhyAddr = u64PhyAddr;
pstNnieParam->astSegData[i].astSrc[j].u64VirAddr = (HI_U64)pu8VirAddr;
}
u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
}
/*fill the mem addrs of each seg's output blobs*/
for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++)
{
if (j != 0)
{
u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
}
pstNnieParam->astSegData[i].astDst[j].u64PhyAddr = u64PhyAddr;
pstNnieParam->astSegData[i].astDst[j].u64VirAddr = (HI_U64)pu8VirAddr;
}
u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
}
return s32Ret;
}
函数SAMPLE_SVP_NNIE_FillForwardInfo
static HI_S32 SAMPLE_SVP_NNIE_FillForwardInfo(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{
HI_U32 i = 0, j = 0;
HI_U32 u32Offset = 0;
HI_U32 u32Num = 0;
for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
{
/*fill forwardCtrl info*/
//根据不同的网络类型选择不同的数据域
if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
{
pstNnieParam->astForwardWithBboxCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
pstNnieParam->astForwardWithBboxCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
pstNnieParam->astForwardWithBboxCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
pstNnieParam->astForwardWithBboxCtrl[i].u32ProposalNum = 1;
pstNnieParam->astForwardWithBboxCtrl[i].u32NetSegId = i;
pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
}
else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||
SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
{
pstNnieParam->astForwardCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
pstNnieParam->astForwardCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
pstNnieParam->astForwardCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
pstNnieParam->astForwardCtrl[i].u32NetSegId = i;
pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
}
u32Offset += pstNnieParam->au32TaskBufSize[i];
/*fill src blob info*/
for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++)
{
/*Recurrent blob*/
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType)
{
pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u32Dim = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.u32Dim;
pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u64VirAddrStep = pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM];
}
else
{
pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;
pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;
pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;
pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
}
}
/*fill dst blob info*/
if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
{
u32Num = pstNnieCfg->u32MaxRoiNum * pstNnieCfg->u32MaxInputNum;
}
else
{
u32Num = pstNnieCfg->u32MaxInputNum;
}
for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++)
{
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType)
{
pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u32Dim =
pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.u32Dim;
pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u64VirAddrStep =
pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM + 1];
}
else
{
pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;
pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;
pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;
pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
}
}
}
return HI_SUCCESS;
}
根据 pstNnieParam->pstModel填充pstNnieParam->astSegData段的输入和输出blob,固定的逻辑
函数SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize
* Description : Get taskinfo and blob memory size
* Input : SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam NNIE parameter
* HI_U32 *pu32TaskInfoSize Task info size
* HI_U32 *pu32TmpBufSize Tmp buffer size
* SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[] each seg input and output blob mem size
* HI_U32 *pu32TotalSize Total mem size
*
*
* Output :
* Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-20
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, HI_U32 *pu32TotalTaskBufSize, HI_U32 *pu32TmpBufSize,
SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[], HI_U32 *pu32TotalSize)
{
HI_S32 s32Ret = HI_SUCCESS;
HI_U32 i = 0, j = 0;
HI_U32 u32TotalStep = 0;
/*Get each seg's task buf size*/
s32Ret = HI_MPI_SVP_NNIE_GetTskBufSize(pstNnieCfg->u32MaxInputNum, pstNnieCfg->u32MaxRoiNum,
pstNnieParam->pstModel, pstNnieParam->au32TaskBufSize, pstNnieParam->pstModel->u32NetSegNum);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,HI_MPI_SVP_NNIE_GetTaskSize failed!\n");
/*Get total task buf size*/
//每个段的TaskBufSize累加
*pu32TotalTaskBufSize = 0;
for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
{
*pu32TotalTaskBufSize += pstNnieParam->au32TaskBufSize[i];
}
/*Get tmp buf size*/
*pu32TmpBufSize = pstNnieParam->pstModel->u32TmpBufSize;
*pu32TotalSize += *pu32TotalTaskBufSize + *pu32TmpBufSize;
/*calculate Blob mem size*/
for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
{
if (SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
{
for (j = 0; j < pstNnieParam->astSegData[i].astSrc[0].u32Num; j++)
{
u32TotalStep += *((HI_S32 *)pstNnieParam->astSegData[i].astSrc[0].unShape.stSeq.u64VirAddrStep + j);
}
}
/*the first seg's Src Blob mem size, other seg's src blobs from the output blobs of
those segs before it or from software output results*/
if (i == 0)
{
SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astSrcNode[0]),
pstNnieParam->pstModel->astSeg[i].u16SrcNum, u32TotalStep, &(pstNnieParam->astSegData[i].astSrc[0]),
SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32SrcSize[0]));
}
/*Get each seg's Dst Blob mem size*/
SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astDstNode[0]),
pstNnieParam->pstModel->astSeg[i].u16DstNum, u32TotalStep, &(pstNnieParam->astSegData[i].astDst[0]),
SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32DstSize[0]));
}
return s32Ret;
}
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_GetBlobMemSize
* Description : Get blob mem size
* Input : SVP_NNIE_NODE_S astNnieNode[] NNIE Node
* HI_U32 u32NodeNum Node num
* HI_U32 astBlob[] blob struct
* HI_U32 u32Align stride align type
* HI_U32 *pu32TotalSize Total size
* HI_U32 au32BlobSize[] blob size
*
*
*
*
* Output :
* Return Value : VOID
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-20
* Author :
* Modification : Create
*
*****************************************************************************/
static void SAMPLE_SVP_NNIE_GetBlobMemSize(SVP_NNIE_NODE_S astNnieNode[], HI_U32 u32NodeNum,
HI_U32 u32TotalStep, SVP_BLOB_S astBlob[], HI_U32 u32Align, HI_U32 *pu32TotalSize, HI_U32 au32BlobSize[])
{
HI_U32 i = 0;
HI_U32 u32Size = 0;
HI_U32 u32Stride = 0;
for (i = 0; i < u32NodeNum; i++)
{
if (SVP_BLOB_TYPE_S32 == astNnieNode[i].enType || SVP_BLOB_TYPE_VEC_S32 == astNnieNode[i].enType ||
SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType)
{
u32Size = sizeof(HI_U32);
}
else
{
u32Size = sizeof(HI_U8);
}
if (SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType)
{
if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align)
{
u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.u32Dim * u32Size);
}
else
{
u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.u32Dim * u32Size);
}
au32BlobSize[i] = u32TotalStep * u32Stride;
}
else
{
if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align)
{
u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.stWhc.u32Width * u32Size);
}
else
{
u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.stWhc.u32Width * u32Size);
}
au32BlobSize[i] = astBlob[i].u32Num * u32Stride * astNnieNode[i].unShape.stWhc.u32Height *
astNnieNode[i].unShape.stWhc.u32Chn;
}
*pu32TotalSize += au32BlobSize[i];
astBlob[i].u32Stride = u32Stride;
}
}
结构体SVP_NNIE_NODE_S:
typedef struct hiSVP_NNIE_NODE_S {
SVP_BLOB_TYPE_E enType;
union {
struct {
HI_U32 u32Width;
HI_U32 u32Height;
HI_U32 u32Chn;
} stWhc;
HI_U32 u32Dim;
} unShape;
HI_U32 u32NodeId;
HI_CHAR szName[SVP_NNIE_NODE_NAME_LEN]; /* Report layer bottom name or data layer bottom name */
} SVP_NNIE_NODE_S;
函数SAMPLE_COMM_SVP_MallocCached
HI_S32 SAMPLE_COMM_SVP_MallocCached(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
{
HI_S32 s32Ret = HI_SUCCESS;
//在用户态分配MMZ内存,该内存支持cache缓存。
//HI_S32 HI_MPI_SYS_MmzAlloc_Cached(HI_U64* pu64PhyAddr, HI_VOID** ppVirAddr, const HI_CHAR* pstrMmb, const HI_CHAR* pstrZone,HI_U32u32Len);
//pu64PhyAddr 物理地址指针。输出
//ppVirAddr 指向虚拟地址指针的指针。输出
//pstrMmb Mmb 名称的字符串指针。输入
//pstrZone MMZ zone 名称的字符串指针。输入
//u32Len 内存块大小。输入
//本接口与HI_MPI_SYS_MmzAlloc接口的区别:通过本接口分配的内存支持cache缓存,对于频繁使用的内存,最好使用本接口分配内存,这样可以提高cpu读写的效率,提升系统性能,如用户在使用ive算子时,就存在大量数据频繁读写,此时使用此接口来分配内存,就能很好的提高cpu 的效率。当 cpu访问此接口分配的内存时,会将内存中的数据放在cache 中,而硬件设备(如ive)只能访问物理内存,不能访问cache 的内容,对于这种cpu和硬件会共同操作的内存,需调用HI_MPI_SYS_MmzFlushCache做好数据同步
s32Ret = HI_MPI_SYS_MmzAlloc_Cached(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);
return s32Ret;
}
函数SAMPLE_COMM_SVP_FlushCache
/*
*Flush cached
*/
HI_S32 SAMPLE_COMM_SVP_FlushCache(HI_U64 u64PhyAddr, HI_VOID *pvVirAddr, HI_U32 u32Size)
{
HI_S32 s32Ret = HI_SUCCESS;
//刷新 cache里的内容到内存并且使cache里的内容无效。
//HI_S32 HI_MPI_SYS_MmzFlushCache(HI_U64 u64PhyAddr, HI_VOID* pVirAddr,HI_U32 u32Size);
//u64PhyAddr 待操作数据的起始物理地址。输入
//pVirAddr 待操作数据的起始虚拟地址指针。不能传NULL。输入
//u32Size 待操作数据的大小。输入
s32Ret = HI_MPI_SYS_MmzFlushCache(u64PhyAddr, pvVirAddr, u32Size);
return s32Ret;
}
函数SAMPLE_SVP_NNIE_Yolov3_SoftwareInit
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
HI_S32 s32Ret = HI_SUCCESS;
HI_U32 u32ClassNum = 0;
HI_U32 u32TotalSize = 0;
HI_U32 u32DstRoiSize = 0;
HI_U32 u32DstScoreSize = 0;
HI_U32 u32ClassRoiNumSize = 0;
HI_U32 u32TmpBufTotalSize = 0;
HI_U64 u64PhyAddr = 0;
HI_U8* pu8VirAddr = NULL;
pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
pstSoftWareParam->u32BboxNumEachGrid = 3;
pstSoftWareParam->u32ClassNum = 80;
pstSoftWareParam->au32GridNumHeight[0] = 13;
pstSoftWareParam->au32GridNumHeight[1] = 26;
pstSoftWareParam->au32GridNumHeight[2] = 52;
pstSoftWareParam->au32GridNumWidth[0] = 13;
pstSoftWareParam->au32GridNumWidth[1] = 26;
pstSoftWareParam->au32GridNumWidth[2] = 52;
pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f*SAMPLE_SVP_NNIE_QUANT_BASE);
pstSoftWareParam->u32ConfThresh = (HI_U32)(0.5f*SAMPLE_SVP_NNIE_QUANT_BASE);
pstSoftWareParam->u32MaxRoiNum = 10;
pstSoftWareParam->af32Bias[0][0] = 116;
pstSoftWareParam->af32Bias[0][1] = 90;
pstSoftWareParam->af32Bias[0][2] = 156;
pstSoftWareParam->af32Bias[0][3] = 198;
pstSoftWareParam->af32Bias[0][4] = 373;
pstSoftWareParam->af32Bias[0][5] = 326;
pstSoftWareParam->af32Bias[1][0] = 30;
pstSoftWareParam->af32Bias[1][1] = 61;
pstSoftWareParam->af32Bias[1][2] = 62;
pstSoftWareParam->af32Bias[1][3] = 45;
pstSoftWareParam->af32Bias[1][4] = 59;
pstSoftWareParam->af32Bias[1][5] = 119;
pstSoftWareParam->af32Bias[2][0] = 10;
pstSoftWareParam->af32Bias[2][1] = 13;
pstSoftWareParam->af32Bias[2][2] = 16;
pstSoftWareParam->af32Bias[2][3] = 30;
pstSoftWareParam->af32Bias[2][4] = 33;
pstSoftWareParam->af32Bias[2][5] = 23;
/*Malloc assist buffer memory*/
u32ClassNum = pstSoftWareParam->u32ClassNum+1;
SAMPLE_SVP_CHECK_EXPR_RET(SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM != pstNnieParam->pstModel->astSeg[0].u16DstNum,
HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n",
pstNnieParam->pstModel->astSeg[0].u16DstNum,SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM);
u32TmpBufTotalSize = SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(pstNnieParam,pstSoftWareParam);
u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);
u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));
u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));
u32TotalSize = u32TotalSize+u32DstRoiSize+u32DstScoreSize+u32ClassRoiNumSize+u32TmpBufTotalSize;
s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV3_INIT",NULL,(HI_U64*)&u64PhyAddr,
(void**)&pu8VirAddr,u32TotalSize);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,Malloc memory failed!\n");
memset(pu8VirAddr,0, u32TotalSize);
SAMPLE_COMM_SVP_FlushCache(u64PhyAddr,(void*)pu8VirAddr,u32TotalSize);
/*set each tmp buffer addr*/
pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;
pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = (HI_U64)(pu8VirAddr);
/*set result blob*/
pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;
pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize;
pstSoftWareParam->stDstRoi.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize);
pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*
pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);
pstSoftWareParam->stDstRoi.u32Num = 1;
pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;
pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;
pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width = u32ClassNum*
pstSoftWareParam->u32MaxRoiNum*SAMPLE_SVP_NNIE_COORDI_NUM;
pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;
pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+u32DstRoiSize;
pstSoftWareParam->stDstScore.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+u32DstRoiSize);
pstSoftWareParam->stDstScore.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*
pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));
pstSoftWareParam->stDstScore.u32Num = 1;
pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;
pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;
pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum*pstSoftWareParam->u32MaxRoiNum;
pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;
pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+
u32DstRoiSize+u32DstScoreSize;
pstSoftWareParam->stClassRoiNum.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+
u32DstRoiSize+u32DstScoreSize);
pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));
pstSoftWareParam->stClassRoiNum.u32Num = 1;
pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;
pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;
pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;
return s32Ret;
}
主要涉及yolo3的一些个性化设置
结构体SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S
/*Yolov3 software parameter*/
typedef struct hiSAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S
{
HI_U32 u32OriImHeight;
HI_U32 u32OriImWidth;
HI_U32 u32BboxNumEachGrid;
HI_U32 u32ClassNum;
HI_U32 au32GridNumHeight[3];
HI_U32 au32GridNumWidth[3];
HI_U32 u32NmsThresh;
HI_U32 u32ConfThresh;
HI_U32 u32MaxRoiNum;
HI_FLOAT af32Bias[3][6];
SVP_MEM_INFO_S stGetResultTmpBuf;
SVP_DST_BLOB_S stClassRoiNum;
SVP_DST_BLOB_S stDstRoi;
SVP_DST_BLOB_S stDstScore;
} SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S;
/*Fill src data*/
stInputDataIdx.u32SegIdx = 0;
stInputDataIdx.u32NodeIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_FillSrcData(&stNnieCfg,&s_stYolov3NnieParam,&stInputDataIdx);
static HI_S32 SAMPLE_SVP_NNIE_FillSrcData(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx)
{
FILE* fp = NULL;
HI_U32 i =0, j = 0, n = 0;
HI_U32 u32Height = 0, u32Width = 0, u32Chn = 0, u32Stride = 0, u32Dim = 0;
HI_U32 u32VarSize = 0;
HI_S32 s32Ret = HI_SUCCESS;
HI_U8*pu8PicAddr = NULL;
HI_U32*pu32StepAddr = NULL;
HI_U32 u32SegIdx = pstInputDataIdx->u32SegIdx;
HI_U32 u32NodeIdx = pstInputDataIdx->u32NodeIdx;
HI_U32 u32TotalStepNum = 0;
/*open file*/
if (NULL != pstNnieCfg->pszPic)
{
fp = fopen(pstNnieCfg->pszPic,"rb");
SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp,HI_INVALID_VALUE,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error, open file failed!\n");
}
/*get data size*/
if(SVP_BLOB_TYPE_U8 <= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType &&
SVP_BLOB_TYPE_YVU422SP >= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
{
u32VarSize = sizeof(HI_U8);
}
else
{
u32VarSize = sizeof(HI_U32);
}
/*fill src data*/
if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
{
u32Dim = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u32Dim;
u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;
pu32StepAddr = (HI_U32*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u64VirAddrStep);
pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);
for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
{
for(i = 0;i < *(pu32StepAddr+n); i++)
{
s32Ret = fread(pu8PicAddr,u32Dim*u32VarSize,1,fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
pu8PicAddr += u32Stride;
}
u32TotalStepNum += *(pu32StepAddr+n);
}
SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,
(HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,
u32TotalStepNum*u32Stride);
}
else
{
u32Height = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Height;
u32Width = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Width;
u32Chn = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Chn;
u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;
pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);
if(SVP_BLOB_TYPE_YVU420SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
{
for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
{
for(i = 0; i < u32Chn*u32Height/2; i++)
{
s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
pu8PicAddr += u32Stride;
}
}
}
else if(SVP_BLOB_TYPE_YVU422SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
{
for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
{
for(i = 0; i < u32Height*2; i++)
{
s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
pu8PicAddr += u32Stride;
}
}
}
else
{
for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
{
for(i = 0;i < u32Chn; i++)
{
for(j = 0; j < u32Height; j++)
{
s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
pu8PicAddr += u32Stride;
}
}
}
}
SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,
(HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,
pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num*u32Chn*u32Height*u32Stride);
}
fclose(fp);
return HI_SUCCESS;
FAIL:
fclose(fp);
return HI_FAILURE;
}
主要完成了读取图片内容,根据pstInputDataIdx放入pstNnieParam里面。
stProcSegIdx.u32SegIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_Forward(&s_stYolov3NnieParam,&stInputDataIdx,&stProcSegIdx,HI_TRUE);
tatic HI_S32 SAMPLE_SVP_NNIE_Forward(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx,
SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S* pstProcSegIdx,HI_BOOL bInstant)
{
HI_S32 s32Ret = HI_SUCCESS;
HI_U32 i = 0, j = 0;
HI_BOOL bFinish = HI_FALSE;
SVP_NNIE_HANDLE hSvpNnieHandle = 0;
HI_U32 u32TotalStepNum = 0;
SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64PhyAddr,
(HI_VOID *) pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64VirAddr,
pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u32Size);
/*set input blob according to node name*/
if(pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx)
{
for(i = 0; i < pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; i++)
{
for(j = 0; j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum; j++)
{
if(0 == strncmp(pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].astDstNode[j].szName,
pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].astSrcNode[i].szName,
SVP_NNIE_NODE_NAME_LEN))
{
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc[i] =
pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx].astDst[j];
break;
}
}
SAMPLE_SVP_CHECK_EXPR_RET((j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum),
HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,can't find %d-th seg's %d-th src blob!\n",
pstProcSegIdx->u32SegIdx,i);
}
}
/*NNIE_Forward*/
//多节点输入输出的CNN类型网络预测
//HI_S32 HI_MPI_SVP_NNIE_Forward(SVP_NNIE_HANDLE *phSvpNnieHandle, const SVP_SRC_BLOB_S astSrc[],const SVP_NNIE_MODEL_S *pstModel, const SVP_DST_BLOB_S astDst[],const SVP_NNIE_FORWARD_CTRL_S *pstForwardCtrl,HI_BOOL bInstant);
//phSvpNnieHandle handle指针。输出
//astSrc[] 多个节点输入,节点的顺序跟网络描述中的顺序要求一致,支持多帧同时输入。输入
//pstModel 网络模型结构体。输入
//astDst[] 网络段的多个节点输出,包含用户标记需要上报输出的中间层结果,以及网络段的最终结果。输出
//pstForwardCtrl 控制结构体。输入
//bInstant 及时返回结果标志。输入
s32Ret = HI_MPI_SVP_NNIE_Forward(&hSvpNnieHandle,
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc,
pstNnieParam->pstModel, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst,
&pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant);
SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
"Error,HI_MPI_SVP_NNIE_Forward failed!\n");
if(bInstant)
{
/*Wait NNIE finish*/
//查询任务是否完成。
//HI_S32 HI_MPI_SVP_NNIE_Query(SVP_NNIE_ID_E enNnieId,SVP_NNIE_HANDLE svpNnieHandle,HI_BOOL *pbFinish,HI_BOOL bBlock);
//enNnieId 任务所运行的NNIE 核指示标志输入
//svpNnieHandle handle。输入
//pbFinish 是否完成标志。输出
//bBlock 是否阻塞查询。输入
while(HI_ERR_SVP_NNIE_QUERY_TIMEOUT == (s32Ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].enNnieId,
hSvpNnieHandle, &bFinish, HI_TRUE)))
{
usleep(100);
SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO,
"HI_MPI_SVP_NNIE_Query Query timeout!\n");
}
}
bFinish = HI_FALSE;
for(i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++)
{
if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType)
{
for(j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++)
{
u32TotalStepNum += *((HI_U32*)(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep)+j);
}
SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
(HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,
u32TotalStepNum*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
}
else
{
SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
(HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num*
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn*
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height*
pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
}
}
return s32Ret;
}
主要进行模型推理,并把它填充进去
s32Ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_Yolov3_GetResult
* Description : this function is used to Get Yolov3 result
* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV3 NNIE parameter
* SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV3 software parameter
*
*
*
*
* Output :
* Return Value : HI_SUCCESS: Success;Error codes: Failure.
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-10
* Author :
* Modification : Create
*
*****************************************************************************/
HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam,
SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam)
{
HI_U32 i = 0;
HI_S32 *aps32InputBlob[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
{
aps32InputBlob[i] = (HI_S32*)pstNnieParam->astSegData[0].astDst[i].u64VirAddr;
au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;
}
return SVP_NNIE_Yolov3_GetResult(aps32InputBlob,pstSoftwareParam->au32GridNumWidth,
pstSoftwareParam->au32GridNumHeight,au32Stride,pstSoftwareParam->u32BboxNumEachGrid,
pstSoftwareParam->u32ClassNum,pstSoftwareParam->u32OriImWidth,
pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32NmsThresh,
pstSoftwareParam->u32ConfThresh,pstSoftwareParam->af32Bias,
(HI_S32*)pstSoftwareParam->stGetResultTmpBuf.u64VirAddr,
(HI_S32*)pstSoftwareParam->stDstScore.u64VirAddr,
(HI_S32*)pstSoftwareParam->stDstRoi.u64VirAddr,
(HI_S32*)pstSoftwareParam->stClassRoiNum.u64VirAddr);
}
主要完成aps32InputBlob和au32Stride的填充后调用SVP_NNIE_Yolov3_GetResult
/*****************************************************************************
* Prototype : SVP_NNIE_Yolov3_GetResult
* Description : Yolov3 GetResult function
* Input : HI_S32 **pps32InputData [IN] pointer to the input data
* HI_U32 au32GridNumWidth[] [IN] Grid num in width direction
* HI_U32 au32GridNumHeight[] [IN] Grid num in height direction
* HI_U32 au32Stride[] [IN] stride of input data
* HI_U32 u32EachGridBbox [IN] Bbox num of each gird
* HI_U32 u32ClassNum [IN] class num
* HI_U32 u32SrcWidth [IN] input image width
* HI_U32 u32SrcHeight [IN] input image height
* HI_U32 u32MaxRoiNum [IN] Max output roi num
* HI_U32 u32NmsThresh [IN] NMS thresh
* HI_U32 u32ConfThresh [IN] conf thresh
* HI_U32 af32Bias[][] [IN] bias
* HI_U32* pu32TmpBuf [IN] assist buffer
* HI_S32 *ps32DstScores [OUT] dst score
* HI_S32 *ps32DstRoi [OUT] dst roi
* HI_S32 *ps32ClassRoiNum [OUT] class roi num
*
* Output :
* Return Value : HI_FLOAT: max score value.
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-14
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],
HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,
HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,
HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
HI_S32* ps32TmpBuf,HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
{
HI_S32 *ps32InputBlob = NULL;
HI_FLOAT *pf32Permute = NULL;
SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
HI_S32 *ps32AssistBuf = NULL;
HI_U32 u32TotalBboxNum = 0;
HI_U32 u32ChnOffset = 0;
HI_U32 u32HeightOffset = 0;
HI_U32 u32BboxNum = 0;
HI_U32 u32GridXIdx;
HI_U32 u32GridYIdx;
HI_U32 u32Offset;
HI_FLOAT f32StartX;
HI_FLOAT f32StartY;
HI_FLOAT f32Width;
HI_FLOAT f32Height;
HI_FLOAT f32ObjScore;
HI_U32 u32MaxValueIndex = 0;
HI_FLOAT f32MaxScore;
HI_S32 s32ClassScore;
HI_U32 u32ClassRoiNum;
HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
HI_U32 u32BlobSize = 0;
HI_U32 u32MaxBlobSize = 0;
//获得最大blob size
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
{
u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*
SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox;
if(u32MaxBlobSize < u32BlobSize)
{
u32MaxBlobSize = u32BlobSize;
}
}
//获得所有bbox数量
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
{
u32TotalBboxNum += au32GridNumWidth[i]*au32GridNumHeight[i]*u32EachGridBbox;
}
//get each tmpbuf addr
pf32Permute = (HI_FLOAT*)ps32TmpBuf;
pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S*)(pf32Permute+u32MaxBlobSize/sizeof(HI_S32));
ps32AssistBuf = (HI_S32*)(pstBbox+u32TotalBboxNum);
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
{
//permute
u32Offset = 0;
ps32InputBlob = pps32InputData[i];
u32ChnOffset = au32GridNumHeight[i]*au32Stride[i]/sizeof(HI_S32);
u32HeightOffset = au32Stride[i]/sizeof(HI_S32);
for (h = 0; h < au32GridNumHeight[i]; h++)
{
for (w = 0; w < au32GridNumWidth[i]; w++)
{
for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++)
{
pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c*u32ChnOffset+h*u32HeightOffset+w]) / SAMPLE_SVP_NNIE_QUANT_BASE;
}
}
}
//decode bbox and calculate score
for(j = 0; j < au32GridNumWidth[i]*au32GridNumHeight[i]; j++)
{
u32GridXIdx = j % au32GridNumWidth[i];
u32GridYIdx = j / au32GridNumWidth[i];
for (k = 0; k < u32EachGridBbox; k++)
{
u32MaxValueIndex = 0;
u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
//decode bbox
f32StartX = ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) / au32GridNumHeight[i];
f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + 2]) * af32Bias[i][2*k]) / u32SrcWidth;
f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + 3]) * af32Bias[i][2*k + 1]) / u32SrcHeight;
//calculate score
(void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + 4], (u32ClassNum+1));
f32ObjScore = pf32Permute[u32Offset + 4];
f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + 5], u32ClassNum, &u32MaxValueIndex);
s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore*SAMPLE_SVP_NNIE_QUANT_BASE);
//filter low score roi
if (s32ClassScore > u32ConfThresh)
{
pstBbox[u32BboxNum].f32Xmin= (HI_FLOAT)(f32StartX - f32Width * 0.5f);
pstBbox[u32BboxNum].f32Ymin= (HI_FLOAT)(f32StartY - f32Height * 0.5f);
pstBbox[u32BboxNum].f32Xmax= (HI_FLOAT)(f32StartX + f32Width * 0.5f);
pstBbox[u32BboxNum].f32Ymax= (HI_FLOAT)(f32StartY + f32Height * 0.5f);
pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
pstBbox[u32BboxNum].u32Mask= 0;
pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex+1);
u32BboxNum++;
}
}
}
}
//quick sort
(void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBbox, 0, u32BboxNum - 1,
sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32),4,(SAMPLE_SVP_NNIE_STACK_S*)ps32AssistBuf);
//Yolov3 and Yolov2 have the same Nms operation
(void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32));
//Get result
for (i = 1; i < u32ClassNum; i++)
{
u32ClassRoiNum = 0;
for(j = 0; j < u32BboxNum; j++)
{
if ((0 == pstBbox[j].u32Mask) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum))
{
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0);
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0);
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), u32SrcWidth);
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), u32SrcHeight);
*(ps32DstScore++) = pstBbox[j].s32ClsScore;
u32ClassRoiNum++;
}
}
*(ps32ClassRoiNum+i) = u32ClassRoiNum;
}
return HI_SUCCESS;
}
结构体SAMPLE_SVP_NNIE_YOLOV3_BBOX_S:
typedef SAMPLE_SVP_NNIE_YOLOV2_BBOX_S SAMPLE_SVP_NNIE_YOLOV3_BBOX_S;
typedef struct hiSAMPLE_SVP_NNIE_YOLOV2_BBOX
{
HI_FLOAT f32Xmin;
HI_FLOAT f32Xmax;
HI_FLOAT f32Ymin;
HI_FLOAT f32Ymax;
HI_S32 s32ClsScore;
HI_U32 u32ClassIdx;
HI_U32 u32Mask;
}SAMPLE_SVP_NNIE_YOLOV2_BBOX_S;
(void)SAMPLE_SVP_NNIE_Detection_PrintResult(&s_stYolov3SoftwareParam.stDstScore,&s_stYolov3SoftwareParam.stDstRoi, &s_stYolov3SoftwareParam.stClassRoiNum,f32PrintResultThresh);
主要完成打印结果
static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore,
SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
{
HI_U32 i = 0, j = 0;
HI_U32 u32RoiNumBias = 0;
HI_U32 u32ScoreBias = 0;
HI_U32 u32BboxBias = 0;
HI_FLOAT f32Score = 0.0f;
HI_S32* ps32Score = (HI_S32*)pstDstScore->u64VirAddr;
HI_S32* ps32Roi = (HI_S32*)pstDstRoi->u64VirAddr;
HI_S32* ps32ClassRoiNum = (HI_S32*)pstClassRoiNum->u64VirAddr;
HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
HI_S32 s32XMin = 0,s32YMin= 0,s32XMax = 0,s32YMax = 0;
u32RoiNumBias += ps32ClassRoiNum[0];
for (i = 1; i < u32ClassNum; i++)
{
u32ScoreBias = u32RoiNumBias;
u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
/*if the confidence score greater than result threshold, the result will be printed*/
if((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
f32PrintResultThresh && ps32ClassRoiNum[i]!=0)
{
SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);
}
for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++)
{
f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
if (f32Score < f32PrintResultThresh)
{
break;
}
s32XMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM];
s32YMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 1];
s32XMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 2];
s32YMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 3];
SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);
}
u32RoiNumBias += ps32ClassRoiNum[i];
}
return HI_SUCCESS;
}