深度学习在计算机视觉等模式识别领域具有强大的能力,但是也遭遇内存占用大、部署环境不友好问题,这里打算写一个专栏,从模型转换到模型压缩,以期可以为深度学习的广泛使用做推广。
深度学习需要部署环境,但是这对硬件支持提出了很多要求,而C的支持则相对比较广泛,因此模型的应用先转换为C较为具有广泛的适用性。这里给出深度学习层的C定义方式,注意这里暂不支持反向传播部分,反向传播设计损失计算、梯度更新以及优化策略等。
#ifdef _WINDOWS
#include "../../inc/netModel/cn_lay_infer.h"
#include "../../inc/netModel/FtCnnNetStructDef.h"
#else
#include "cn_lay_infer.h"
#include "FtCnnNetStructDef.h"
#endif
#ifndef TEST_INTEGER
/*
* up_padding: hPadding;
* left_padding: wPadding
*/
SINT32 Infer_padding_layInout(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 up_padding, SINT32 left_padding)
{
SINT32 ret = 0, i = 0, c = 0;
SINT32 src_plane_offset = 0, dst_plane_offset = 0;
SINT32 inBatch = 0, outBatch = 0;
SINT32 wbytes = 0;
if(output.dataBuff == NULL || input.dataBuff == NULL)
{
LOGE("%s... address = NULL\n", __FUNCTION__);
return -1;
}
inBatch = input.imgW*input.imgH;
outBatch = output.imgW*output.imgH;
if(inBatch <=0 || outBatch <=0 ||(output.imgChannel != input.imgChannel))
{
LOGE("%s... param is error\n", __FUNCTION__);
return -2;
}
wbytes = input.imgW*sizeof(FP32);
for (c = 0; c < input.imgChannel; c++)
{
src_plane_offset = inBatch *c;
dst_plane_offset = outBatch *c + up_padding*output.imgW + left_padding;
for (i = 0; i < input.imgH; i++)
{
memcpy(output.dataBuff + dst_plane_offset, input.dataBuff + src_plane_offset, wbytes);
src_plane_offset += input.imgW;
dst_plane_offset += output.imgW;
}
}
return ret;
}
//SINT32 Infer_conv2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 in_c_idx, SINT32 out_c_idx,\
// LayValueSt laySt, FP32* netParam)
//{
// SINT32 ret = 0, src_i, src_j, dst_r, dst_col_idx,kr,kc;
// SINT32 input_row_offset, input_offset;
FP64 sum;
// FP64 sum = 0.f;
// FP32*weightPtr = NULL;
// SINT32 inputPos = 0;
// SINT32 output_offset = 0;
// // 参数检查
// if (output.dataBuff == NULL || NULL == netParam || input.dataBuff == NULL)
// {
// ret = -1;
// goto Infer_conv2d_one_channel;
// }
// weightPtr = netParam;
// inputPos = input.imgW*input.imgH* in_c_idx;
// output_offset = output.imgW*output.imgH*out_c_idx;
// //滑窗双循环
// for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
// {
// for (src_j=0,dst_col_idx = 0; dst_col_idx < output.imgW; src_j += laySt.wStride, dst_col_idx++)// COL
// {
// sum = 0;
// weightPtr = netParam;
// input_offset = src_i * input.imgW + src_j;
// for (kr = 0; kr < laySt.kerH; kr++)
// {
// for (kc = 0; kc < laySt.kerW; kc++)
// {
// //窗口求和
// sum += *(input.dataBuff + inputPos + input_offset + kc*laySt.wDilate) **(weightPtr+ kc);//*(FP64)*(weightPtr+ kc);
// }
// weightPtr += laySt.kerW;
// input_offset += input.imgW*laySt.hDilate;
// }
// // 填到目标地址
// *(output.dataBuff + output_offset + dst_col_idx) = sum;
// }
// output_offset += output.imgW;
// }
//Infer_conv2d_one_channel:
// return ret;
//}
SINT32 Infer_conv2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 in_c_idx, SINT32 out_c_idx,\
LayValueSt laySt, FP32* netParam)
{
SINT32 ret = 0, src_i, src_j, dst_r, dst_col_idx,kr,kc;
SINT32 input_row_offset, input_offset;
// FP64 sum;
FP32 sum = 0.f;
FP32*weightPtr = NULL;
SINT32 inputPos = 0;
SINT32 output_offset = 0;
SINT32 sf[35]={0};
FP32* pdst=output.dataBuff;
FP32* psrc=input.dataBuff;
FP32 tmpd[15]={0};
UINT8 tmpi=0;
// 参数检查
if (output.dataBuff == NULL || NULL == netParam || input.dataBuff == NULL)
{
ret = -1;
goto Infer_conv2d_one_channel;
}
weightPtr = netParam;
inputPos = input.imgW*input.imgH* in_c_idx;
output_offset = output.imgW*output.imgH*out_c_idx;
pdst+=output_offset;
psrc+=inputPos;
if(0)
{
}
#if 1
else if((laySt.kerW==3)&&(laySt.kerH==3))
{
sf[0]=0;
sf[1]=laySt.wDilate;
sf[2]=2*laySt.wDilate;
sf[3]=sf[0]+input.imgW*laySt.hDilate;
sf[4]=sf[1]+input.imgW*laySt.hDilate;
sf[5]=sf[2]+input.imgW*laySt.hDilate;
sf[6]=sf[3]+input.imgW*laySt.hDilate;
sf[7]=sf[4]+input.imgW*laySt.hDilate;
sf[8]=sf[5]+input.imgW*laySt.hDilate;
for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
{
psrc=input.dataBuff+inputPos+src_i * input.imgW;
for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
{
*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8);
pdst++;
psrc+= laySt.wStride;
}
//psrc+=(laySt.hStride-1)*input.imgW;
}
}
else if((laySt.kerW==3)&&(laySt.kerH==5))
{
sf[0]=0;
sf[1]=laySt.wDilate;
sf[2]=2*laySt.wDilate;
sf[3]=sf[0]+input.imgW*laySt.hDilate;
sf[4]=sf[1]+input.imgW*laySt.hDilate;
sf[5]=sf[2]+input.imgW*laySt.hDilate;
sf[6]=sf[3]+input.imgW*laySt.hDilate;
sf[7]=sf[4]+input.imgW*laySt.hDilate;
sf[8]=sf[5]+input.imgW*laySt.hDilate;
sf[9]=sf[6]+input.imgW*laySt.hDilate;
sf[10]=sf[7]+input.imgW*laySt.hDilate;
sf[11]=sf[8]+input.imgW*laySt.hDilate;
sf[12]=sf[9]+input.imgW*laySt.hDilate;
sf[13]=sf[10]+input.imgW*laySt.hDilate;
sf[14]=sf[11]+input.imgW*laySt.hDilate;
for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
{
psrc=input.dataBuff+inputPos+src_i * input.imgW;
for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
{
// if((dst_r<=3)&&(dst_col_idx<=3))
// {
// printf("\n%d*%d:",dst_r,dst_col_idx);
// for(tmpi=0;tmpi<15;tmpi++)
// {
// //tmpd[tmpi]=*(psrc+sf[tmpi]);
// tmpd[tmpi]=(psrc+sf[tmpi]-input.dataBuff);
// printf("%d-",(UINT32)tmpd[tmpi]);
// }
// }
*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14);
pdst++;
psrc+= laySt.wStride;
}
//psrc+=(laySt.hStride-1)*input.imgW;
}
}
else if((laySt.kerW==5)&&(laySt.kerH==3))
{
sf[0]=0;
sf[1]=laySt.wDilate;
sf[2]=2*laySt.wDilate;
sf[3]=3*laySt.wDilate;
sf[4]=4*laySt.wDilate;
sf[5]=sf[0]+input.imgW*laySt.hDilate;
sf[6]=sf[1]+input.imgW*laySt.hDilate;
sf[7]=sf[2]+input.imgW*laySt.hDilate;
sf[8]=sf[3]+input.imgW*laySt.hDilate;
sf[9]=sf[4]+input.imgW*laySt.hDilate;
sf[10]=sf[5]+input.imgW*laySt.hDilate;
sf[11]=sf[6]+input.imgW*laySt.hDilate;
sf[12]=sf[7]+input.imgW*laySt.hDilate;
sf[13]=sf[8]+input.imgW*laySt.hDilate;
sf[14]=sf[9]+input.imgW*laySt.hDilate;
for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
{
psrc=input.dataBuff+inputPos+src_i * input.imgW;
for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
{
*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14);
pdst++;
psrc+= laySt.wStride;
}
//psrc+=(laySt.hStride-1)*input.imgW;
}
}
else if((laySt.kerW==5)&&(laySt.kerH==7))
{
sf[0]=0;
sf[1]=laySt.wDilate;
sf[2]=2*laySt.wDilate;
sf[3]=3*laySt.wDilate;
sf[4]=4*laySt.wDilate;
sf[5]=sf[0]+input.imgW*laySt.hDilate;
sf[6]=sf[1]+input.imgW*laySt.hDilate;
sf[7]=sf[2]+input.imgW*laySt.hDilate;
sf[8]=sf[3]+input.imgW*laySt.hDilate;
sf[9]=sf[4]+input.imgW*laySt.hDilate;
sf[10]=sf[5]+input.imgW*laySt.hDilate;
sf[11]=sf[6]+input.imgW*laySt.hDilate;
sf[12]=sf[7]+input.imgW*laySt.hDilate;
sf[13]=sf[8]+input.imgW*laySt.hDilate;
sf[14]=sf[9]+input.imgW*laySt.hDilate;
sf[15]=sf[10]+input.imgW*laySt.hDilate;
sf[16]=sf[11]+input.imgW*laySt.hDilate;
sf[17]=sf[12]+input.imgW*laySt.hDilate;
sf[18]=sf[13]+input.imgW*laySt.hDilate;
sf[19]=sf[14]+input.imgW*laySt.hDilate;
sf[20]=sf[15]+input.imgW*laySt.hDilate;
sf[21]=sf[16]+input.imgW*laySt.hDilate;
sf[22]=sf[17]+input.imgW*laySt.hDilate;
sf[23]=sf[18]+input.imgW*laySt.hDilate;
sf[24]=sf[19]+input.imgW*laySt.hDilate;
sf[25]=sf[20]+input.imgW*laySt.hDilate;
sf[26]=sf[21]+input.imgW*laySt.hDilate;
sf[27]=sf[22]+input.imgW*laySt.hDilate;
sf[28]=sf[23]+input.imgW*laySt.hDilate;
sf[29]=sf[24]+input.imgW*laySt.hDilate;
sf[30]=sf[25]+input.imgW*laySt.hDilate;
sf[31]=sf[26]+input.imgW*laySt.hDilate;
sf[32]=sf[27]+input.imgW*laySt.hDilate;
sf[33]=sf[28]+input.imgW*laySt.hDilate;
sf[34]=sf[29]+input.imgW*laySt.hDilate;
for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
{
psrc=input.dataBuff+inputPos+src_i * input.imgW;
for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
{
*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14)+
*(psrc+sf[15])**(weightPtr+15)+*(psrc+sf[16])**(weightPtr+16)+*(psrc+sf[17])**(weightPtr+17)+*(psrc+sf[18])**(weightPtr+18)+*(psrc+sf[19])**(weightPtr+19)+
*(psrc+sf[20])**(weightPtr+20)+*(psrc+sf[21])**(weightPtr+21)+*(psrc+sf[22])**(weightPtr+22)+*(psrc+sf[23])**(weightPtr+23)+*(psrc+sf[24])**(weightPtr+24)+
*(psrc+sf[25])**(weightPtr+25)+*(psrc+sf[26])**(weightPtr+26)+*(psrc+sf[27])**(weightPtr+27)+*(psrc+sf[28])**(weightPtr+28)+*(psrc+sf[29])**(weightPtr+29)+
*(psrc+sf[30])**(weightPtr+30)+*(psrc+sf[31])**(weightPtr+31)+*(psrc+sf[32])**(weightPtr+32)+*(psrc+sf[33])**(weightPtr+33)+*(psrc+sf[34])**(weightPtr+34)
;
pdst++;
psrc+= laySt.wStride;
}
//psrc+=(laySt.hStride-1)*input.imgW;
}
}
else if((laySt.kerW==1)&&(laySt.kerH==1))
{
for (dst_r = 0; dst_r < output.imgH; dst_r++)//R
{
for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
{
*pdst=*(psrc)**(weightPtr);
sum=*pdst;
pdst++;
psrc+= laySt.wStride;
}
}
}
#endif
else
{
//滑窗双循环
printf("\n%d-%d;",laySt.kerW,laySt.kerH);
for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
{
for (src_j=0,dst_col_idx = 0; dst_col_idx < output.imgW; src_j += laySt.wStride, dst_col_idx++)// COL
{
sum = 0;
weightPtr = netParam;
input_offset = src_i * input.imgW + src_j;
for (kr = 0; kr < laySt.kerH; kr++)
{
for (kc = 0; kc < laySt.kerW; kc++)
{
//窗口求和
sum += *(input.dataBuff + inputPos + input_offset + kc*laySt.wDilate) **(weightPtr+ kc);// 原来: *(FP64)*(weightPtr+ kc); , 强转为64位浮点数.
}
weightPtr += laySt.kerW;
input_offset += input.imgW*laySt.hDilate;
}
// 填到目标地址
*(output.dataBuff + output_offset + dst_col_idx) = sum;
}
output_offset += output.imgW;
}
}
//printf("\n%f-%d-%d-%f",sum,laySt.kerW,laySt.kerH,*(weightPtr));
Infer_conv2d_one_channel:
return ret;
}
SINT32 Infer_maxpool2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 channel,\
LayValueSt laySt)
{
SINT32 ret = 0, src_r, src_c, dst_r, dst_c, kr, k_c;
SINT32 input_offset;
FP32 max= -1e+7;
SINT32 inputPos, outputPos;
if(input.dataBuff == NULL || output.dataBuff == NULL)
{
return -1;
}
inputPos = input.imgW*input.imgH* channel;
outputPos = output.imgW*output.imgH*channel;
//滑窗双循环
for (src_r = 0, dst_r = 0; dst_r < output.imgH; src_r += laySt.hStride, dst_r++,outputPos += output.imgW)
{
for (src_c = 0, dst_c = 0; dst_c < output.imgW; src_c += laySt.wStride, dst_c++)
{
max = -1e+7;//每个窗口评估一次
input_offset = inputPos + src_r*input.imgW + src_c;
for (kr = 0; kr < laySt.kerH; kr++,input_offset += input.imgW)
{
for (k_c = 0; k_c < laySt.kerW; k_c++)
{
//窗口求和
if(*(input.dataBuff + input_offset + k_c) > max)
max = *(input.dataBuff + input_offset + k_c);
}
}
// 填到目标地址
*(output.dataBuff + outputPos + dst_c) = max;
}
}
return ret;
}
SINT32 Infer_AvgPool2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 channel,\
LayValueSt laySt)
{
SINT32 ret = 0, src_r, src_c, dst_r, dst_c, kr, k_c;
SINT32 input_row_offset, input_col_offset;
FP32 sumVal = 0;
SINT32 inputPos, outputPos;
if(output.dataBuff == NULL || input.dataBuff == NULL)
{
return -1;
}
inputPos = input.imgW*input.imgH* channel;
outputPos = output.imgW*output.imgH*channel;
//滑窗双循环
for (src_r = 0, dst_r = 0; dst_r < output.imgH; src_r += laySt.hStride, dst_r++)
{
for (src_c = 0, dst_c = 0; dst_c < output.imgW; src_c += laySt.wStride, dst_c++)
{
sumVal = 0;//每个窗口评估一次
for (kr = 0; kr < laySt.kerH; kr++)
{
input_row_offset = inputPos + (kr + src_r)* laySt.kerW + src_c;
for (k_c = 0; k_c < laySt.kerW; k_c++)
{
input_col_offset = input_row_offset + k_c;
//窗口求和
sumVal += *(input.dataBuff + input_col_offset);
}
}
// 填到目标地址
*(output.dataBuff + outputPos + dst_r*output.imgW + dst_c) = sumVal/(laySt.kerH*laySt.kerW);
}
}
return ret;
}
#endif
基本层的定义:
#ifdef _WINDOWS
#include "../../inc/netModel/cn_BaseNet.h"
#include "../../inc/netModel/cn_lay_infer.h"
#include "../../inc/netModel/FtCnnNetStructDef.h"
#include "../../inc/netModel/FtNetMemoryManger.h"
#define LOGE(...)
#define LOGD(...)
#else
#include "FtCommon.h"
#include "cn_BaseNet.h"
#include "cn_lay_infer.h"
#include "FtCnnNetStructDef.h"
#include "FtNetMemoryManger.h"
#endif
#ifndef TEST_INTEGER
SINT32 Net_Conv2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0,posIdx = 0;
SINT16* pLayPtr=layPtr;
SINT32 layCnt=0;
SINT32 tmp = 0;
SINT32 netParamCnt=0;
ST_CnnLayInOut _padding, tmpLayInOut;
LayValueSt laySt;
SINT32 channel_in_idx = 0, out_c_idx = 0, channel_in_start = 0, channel_in_end = 0;
SINT32 each_group_channel,output_offset;
SINT32 inBatch=0, outBatch = 0,kerBatch;
SINT32 biasPos = 0;
FP32* pKernel = paramPtr;
FP32* pkerBias = NULL;
//-- zero
Ft_Zeros_ST_CnnLayInOut(&_padding);
Ft_Zeros_ST_CnnLayInOut(&tmpLayInOut);
if(input.dataBuff == NULL || output == NULL)
{
ret = S_INVALID_ADDRESS;
goto CONV2D_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt)); // right>left copy
if(input.imgChannel != laySt.inChannel)
{
ret = -2; // layer's in channel should be equal to inputs' channel.
goto CONV2D_EXIT;
}
// computing input size after padding operation.
_padding.imgW = input.imgW + laySt.wPadding*2; // size 上下、左右是对称的,所以 *2
_padding.imgH = input.imgH + laySt.hPadding*2;
_padding.imgChannel = input.imgChannel; // padding do not change channel num.
//-- 计算得到的内存区间大小 计算公式: [imgW+2P-D*(kernelsize_W-1)-1] / stride_W + 1
output->imgW = (input.imgW + laySt.wPadding *2 - laySt.wDilate *(laySt.kerW - 1) -1)/laySt.wStride + 1;
output->imgH = (input.imgH + laySt.hPadding *2 - laySt.hDilate *(laySt.kerH - 1) -1)/laySt.hStride +1;
output->imgChannel = laySt.outChannel; // output feature channel should equal to layer out channel.
tmp = output->imgW*output->imgH*output->imgChannel *sizeof(FP32); // output needed storage size
if(tmp > output->validLen)
{
LOGE("*** output is error %d, %d!", tmp, output->validLen);
}
tmpLayInOut.imgW = output->imgW;
tmpLayInOut.imgH = output->imgH;
tmpLayInOut.imgChannel = 1;
kerBatch = laySt.kerH*laySt.kerW; // nums of digits in one channel kernel
inBatch = _padding.imgW *_padding.imgH; // number of digits needed in one feature channel after input - padding operation.
outBatch = tmpLayInOut.imgW*tmpLayInOut.imgH; // number of digits needed in one output feature channel after conv operation.
each_group_channel = _padding.imgChannel / laySt.Groups; // input after padding, channels number in each group.
//--malloc buffer;
ret = Ft_SafeAlloc_ST_CnnLayInOut(&_padding); // 为输入特征(input feature after padding)分配空间
// 按位或赋值(|=) 运算符使用两个操作数的二进制表示,对它们执行按位或运算并将结果分配给变量。
ret |= Ft_SafeAlloc_ST_CnnLayInOut(&tmpLayInOut); // 为输出(output feature)分配空间 正常分配时返回值应为0
if(ret != 0)
{
ret = -3;
goto CONV2D_EXIT;
}
//-- DoInit
ret = Infer_padding_layInout(input, _padding, laySt.hPadding,laySt.wPadding);
if(ret != 0)
{
ret =-4;
goto CONV2D_EXIT;
}
//-- handle
*paramlen =(laySt.inChannel*kerBatch * laySt.outChannel)/laySt.Groups;
biasPos = *paramlen;
if(laySt.BiasSate)
{
*paramlen += laySt.outChannel;
pkerBias = paramPtr + biasPos;
}
memset(output->dataBuff, 0x00, output->imgChannel*outBatch*sizeof(FP32));
// 卷积
channel_in_start = 0;
channel_in_end = each_group_channel;
output_offset = 0;
for (out_c_idx =0; out_c_idximgChannel; out_c_idx++)
{
// 偏置项
if (pkerBias)
{
for (posIdx = 0; posIdx < outBatch; posIdx++)
{
*(output->dataBuff + output_offset + posIdx) += *(pkerBias + out_c_idx);
}
}
for (channel_in_idx = channel_in_start; channel_in_idxdataBuff + output_offset + posIdx) += *(tmpLayInOut.dataBuff + posIdx);
}
//内存清空
memset(tmpLayInOut.dataBuff, 0x00, tmpLayInOut.validLen);
pKernel += kerBatch;
}
// input组得一个轮回,重新置0
if (channel_in_end >= input.imgChannel)
{
channel_in_start = 0;
channel_in_end = each_group_channel;
}
else //这里针对分组卷积,例如可分卷积
{
channel_in_start = channel_in_end;
channel_in_end += each_group_channel;
}
output_offset += outBatch;
}
//-- exit--
CONV2D_EXIT:
Ft_SafeFree_ST_CnnLayInOut(&tmpLayInOut);
Ft_SafeFree_ST_CnnLayInOut(&_padding);
return ret;
}
//2022 01 11
SINT32 Net_Conv2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
LayValueSt laySt;
SINT32 layCnt=0;
SINT32 buferSize=0;
SINT32 maxbuffSize=0;
if(layPtr == NULL || output == NULL || retBufSize == NULL)
{
ret = S_INVALID_ADDRESS;
goto CONV2D_OUTSIZE_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt)); //从存储区layPtr复制sizeof(LayValueSt)个字节到存储区&laySt
//-- 计算得到的内存区间大小 计算输出特征的尺寸:Out_w = (in_w -(wDilate*(kernel_w-1)+1) +2P )/s + 1
output->imgW = (input.imgW + laySt.wPadding *2 - laySt.wDilate *(laySt.kerW - 1) -1)/laySt.wStride + 1; //输出特征W
output->imgH = (input.imgH + laySt.hPadding *2 - laySt.hDilate *(laySt.kerH - 1) -1)/laySt.hStride + 1; //输出特征H
output->imgChannel = laySt.outChannel; // 输出特征C
//计算每一次卷积层需要消耗的内存大小
buferSize = output->imgW*output->imgH;
buferSize += (input.imgW + laySt.wPadding *2)*(input.imgH + laySt.hPadding *2)*input.imgChannel; // 特征Tensor浮点数个数
*retBufSize = buferSize*sizeof(FP32);
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
CONV2D_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_MaxPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret = 0;
SINT32 c_idx;
LayValueSt laySt;
ST_CnnLayInOut _padding;
SINT32 rows = 0, cols = 0;
Ft_Zeros_ST_CnnLayInOut(&_padding);
// 参数检查
if(output == NULL || layPtr == NULL)
{
ret = -1;
goto MAXPOOL2D_OUTSIZE_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt));
_padding.imgW = input.imgW + laySt.wPadding*2;
_padding.imgH = input.imgH + laySt.hPadding*2;
_padding.imgChannel = input.imgChannel;
//向上取整
if(laySt.ceilMode >0)
{
cols = _padding.imgW -1 -laySt.wDilate*(laySt.kerW -1);
rows = _padding.imgH -1 -laySt.hDilate*(laySt.kerH -1);
_padding.imgW += laySt.wStride- (cols%(laySt.wStride));
_padding.imgH += laySt.hStride- (rows%(laySt.hStride));
}
ret = Ft_SafeAlloc_ST_CnnLayInOut(&_padding);
if(ret !=0)
{
LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut(),ret = %d;\n", __FUNCTION__, __LINE__, ret);
goto MAXPOOL2D_OUTSIZE_EXIT;
}
Infer_padding_layInout(input, _padding, laySt.hPadding, laySt.wPadding);
output->imgW = (_padding.imgW -1 - laySt.wDilate*(laySt.kerW -1))/laySt.wStride +1;
output->imgH = (_padding.imgH -1 - laySt.hDilate*(laySt.kerH -1))/laySt.hStride +1;
output->imgChannel = _padding.imgChannel;
// maxpool 操作
for (c_idx = 0; c_idx < input.imgChannel; c_idx++)
{
Infer_maxpool2d_one_channel(_padding, *output, c_idx, laySt);
}
*paramlen = 0;
MAXPOOL2D_OUTSIZE_EXIT:
Ft_SafeFree_ST_CnnLayInOut(&_padding);
return ret;
}
SINT32 Net_MaxPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
SINT32 layCnt=0;
SINT32 buferSize=0;
SINT32 maxbuffSize=0;
SINT32 imgW = 0, imgH = 0;
SINT32 cols = 0, rows = 0;
LayValueSt laySt;
if(layPtr == NULL || output == NULL || retBufSize == NULL)
{
ret = S_INVALID_ADDRESS;
goto MAXPOOL2D_OUTSIZE_EXIT;
}
//-- zero
memcpy(&laySt, layPtr, sizeof(LayValueSt));
//内存计算公式
imgW = input.imgW + laySt.wPadding*2;
imgH = input.imgH + laySt.hPadding*2;
//向上取整
if(laySt.ceilMode >0)
{
cols = imgW -1 -laySt.wDilate*(laySt.kerW -1);
rows = imgH -1 -laySt.hDilate*(laySt.kerH -1);
imgW += laySt.wStride- (cols%(laySt.wStride));
imgH += laySt.hStride -(rows%(laySt.hStride));
}
//内部计算将要使用到的内存大小
*retBufSize = imgW*imgH*input.imgChannel*sizeof(FP32);
output->imgW = (imgW- laySt.wDilate*(laySt.kerW -1) -1)/laySt.wStride +1;
output->imgH = (imgH- laySt.hDilate*(laySt.kerH -1) -1)/laySt.hStride +1;
output->imgChannel = input.imgChannel;
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
MAXPOOL2D_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_AvgPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret = 0;
SINT32 c_idx = 0;
LayValueSt laySt;
ST_CnnLayInOut _padding;
Ft_Zeros_ST_CnnLayInOut(&_padding);
// 参数检查
if(output == NULL || layPtr == NULL)
{
ret = S_INVALID_ADDRESS;
goto AVGPOOL2D_OUTSIZE_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt));
_padding.imgW = input.imgW + laySt.wPadding*2;
_padding.imgH = input.imgH + laySt.hPadding*2;
_padding.imgChannel = input.imgChannel;
//向上取整
if(laySt.ceilMode >0)
{
_padding.imgW += laySt.wStride- ((_padding.imgW - laySt.wDilate*(laySt.kerW -1))%(laySt.wStride));
_padding.imgH += laySt.hStride -((_padding.imgH - laySt.hDilate*(laySt.kerH -1))%(laySt.hStride));
}
output->imgW = _padding.imgW/laySt.wStride +1;
output->imgH = _padding.imgH/laySt.hStride +1;
output->imgChannel = input.imgChannel;
if(output->imgW*output->imgH*output->imgChannel *sizeof(FP32) > output->validLen)
{
LOGE("***harvey output is error!\n");
}
Ft_SafeAlloc_ST_CnnLayInOut(&_padding);
Infer_padding_layInout(input, _padding, laySt.hPadding, laySt.wPadding);
// maxpool 操作
for (c_idx = 0; c_idx < input.imgChannel; c_idx++)
{
Infer_AvgPool2d_one_channel(_padding, *output, c_idx, laySt);
}
*paramlen = 0;
AVGPOOL2D_OUTSIZE_EXIT:
Ft_SafeFree_ST_CnnLayInOut(&_padding);
return ret;
}
SINT32 Net_AvgPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
return Net_MaxPool2d_OutSize_Infer(input, output, layPtr, laylen, retBufSize, bAlloc);
}
SINT32 Net_BatchNorm2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0;
SINT32 c = 0,idx = 0;
SINT32 batchSize = 0, offset = 0;;
FP32 mean = 0.f, coe=0.f, bias= 0.f;
//-- zero
if(input.dataBuff == NULL || output== NULL)
{
ret = -1;
goto BATCHNORM2D_EXIT;
}
if(output->dataBuff == NULL)
{
ret= -2;
goto BATCHNORM2D_EXIT;
}
output->imgW = input.imgW;
output->imgH = input.imgH;
output->imgChannel = input.imgChannel;
batchSize =input.imgW*input.imgH;
for(c = 0; c< input.imgChannel; c++)
{
mean =*(paramPtr + c);
coe = *(paramPtr + c + input.imgChannel);
bias = *(paramPtr + c + input.imgChannel*2);
for(idx = 0; idx dataBuff +offset+ idx)= (*(input.dataBuff +offset+ idx) - mean)*coe + bias;
}
offset += batchSize;
}
*paramlen = input.imgChannel * 3;
//-- exit--
BATCHNORM2D_EXIT:
return ret;
}
SINT32 Net_BatchNorm2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
SINT16* pLayPtr=NULL;
SINT32 layCnt=0;
SINT32 buferSize=0;
SINT32 maxbuffSize=0;
if(output == NULL || retBufSize == NULL)
{
ret = -1;
goto BATCHNORM2D_OUTSIZE_EXIT;
}
output->imgChannel = input.imgChannel;
output->imgH = input.imgH;
output->imgW = input.imgW;
*retBufSize = 0;
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
BATCHNORM2D_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_LeakyReLU_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0;
SINT32 c = 0,r = 0, idx;
SINT32 bufferlen = 0;
FP32 alpha = 0.01f;
//-- zero
if(input.dataBuff == NULL || output== NULL)
{
ret = -1;
goto LEAKYRELU_EXIT;
}
if((output->dataBuff == NULL))
{
ret= -2;
goto LEAKYRELU_EXIT;
}
output->imgChannel = input.imgChannel;
output->imgH = input.imgH;
output->imgW = input.imgW;
idx = 0;
bufferlen = input.imgChannel*input.imgW*input.imgH;
for(idx =0; idx < bufferlen; idx++)
{
if(*(input.dataBuff + idx) < 0)
{
*(output->dataBuff + idx) = alpha * (*(input.dataBuff + idx));
}
else
{
*(output->dataBuff + idx) = *(input.dataBuff + idx);
}
}
*paramlen = 0;
//-- exit--
LEAKYRELU_EXIT:
return ret;
}
SINT32 Net_LeakyReLU_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
if(layPtr != NULL && laylen != NULL)
{
ret = -1;
goto LEAKYRELU_OUTSIZE_EXIT;
}
output->imgChannel = input.imgChannel;
output->imgH = input.imgH;
output->imgW = input.imgW;
*retBufSize = 0;
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
LEAKYRELU_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_Linear_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret = 0;
SINT32 i = 0, inputlen = 0, j = 0;
FP32 curValue = 0.0f;
FP32* pParaW = NULL;
FP32* pParaBias = NULL;
LayValueSt laySt;
ST_CnnLayInOut tmpInOut;
Ft_Zeros_ST_CnnLayInOut(&tmpInOut);
if(input.dataBuff == NULL || output== NULL)
{
ret = S_INVALID_ADDRESS;
LOGE("%s :code %d", __FUNCTION__, __LINE__);
goto LINEAR_EXIT;
}
if(output->dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
LOGE("*************harvey:\n");
LOGE("%s :code %d", __FUNCTION__, __LINE__);
goto LINEAR_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt));
if(input.dataBuff == output->dataBuff)
{
tmpInOut.imgH = input.imgH;
tmpInOut.imgW = input.imgW;
tmpInOut.imgChannel = input.imgChannel;
ret = Ft_SafeAlloc_ST_CnnLayInOut(&tmpInOut);
if(ret != 0)
{
LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut() is error,ret = %d;", __FUNCTION__, __LINE__, ret);
goto LINEAR_EXIT;
}
memcpy(tmpInOut.dataBuff, input.dataBuff, tmpInOut.validLen);
}
else
{
memcpy(&tmpInOut, &input, sizeof(ST_CnnLayInOut));
}
inputlen = tmpInOut.imgW *tmpInOut.imgH*tmpInOut.imgChannel;
output->imgChannel = laySt.outChannel;
output->imgH = 1;
output->imgW = 1;
if(inputlen != laySt.inChannel)
{
ret = S_INVALID_PARM;
LOGE("%s :code %d;%d,%d\n", __FUNCTION__, __LINE__,inputlen, laySt.inChannel);
goto LINEAR_EXIT;
}
pParaW = paramPtr;
//最终的数据长度用 imgH来表示, imgW = 1
if(laySt.BiasSate)
{
pParaBias = paramPtr + output->imgChannel*inputlen;
}
for (i = 0; i < output->imgChannel; i++)
{
curValue = 0.0f;
for (j = 0; j < inputlen; j++, pParaW++)
{
curValue += *pParaW * (*(tmpInOut.dataBuff + j));
}
if (pParaBias != NULL)
{
curValue += *(pParaBias + i);
}
*(output->dataBuff + i) = curValue;
}
*paramlen = output->imgChannel*inputlen;
if(laySt.BiasSate)
{
*paramlen += output->imgChannel;
}
//-- exit--
LINEAR_EXIT:
if(tmpInOut.dataBuff != input.dataBuff)
{
Ft_SafeFree_ST_CnnLayInOut(&tmpInOut);
}
return ret;
}
SINT32 Net_Linear_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
LayValueSt laySt;
if(layPtr == NULL)
{
ret = S_INVALID_ADDRESS;
goto LINEAR_OUTSIZE_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt));
SINT32 tmp = 0;
tmp = input.imgW *input.imgH*input.imgChannel;
if(tmp != laySt.inChannel)
{
ret = S_INVALID_PARM;
goto LINEAR_OUTSIZE_EXIT;
}
output->imgW = 1;
output->imgH =1;
output->imgChannel = laySt.outChannel;
//最大需要内存量
*retBufSize = laySt.inChannel*sizeof(FP32);
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
LINEAR_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_BatchNorm1d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0;
SINT32 c = 0;
FP32* pNetParamPtr = NULL;
if(output == NULL || input.dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
goto BATCHNORM1D_EXIT;
}
if(output->dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
goto BATCHNORM1D_EXIT;
}
if(input.imgH != 1 || input.imgW !=1)
{
ret = S_INVALID_PARM;
goto BATCHNORM1D_EXIT;
}
if((output->imgH != input.imgH) ||(output->imgW != input.imgW) ||
(output->imgChannel != input.imgChannel) || (output->dataBuff == NULL))
{
ret = S_INVALID_PARM;
goto BATCHNORM1D_EXIT;
}
pNetParamPtr = paramPtr;
for(c = 0; c< input.imgChannel; c++)
{
//reuslt = (input - mean)*coe + bias
*(output->dataBuff + c)= (*(input.dataBuff + c) -*(pNetParamPtr + c))**(pNetParamPtr + c + input.imgChannel) + *(pNetParamPtr + c + input.imgChannel*2);
}
*paramlen = input.imgChannel *3;
//-- exit--
BATCHNORM1D_EXIT:
return ret;
}
SINT32 Net_BatchNorm1d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
SINT16* pLayPtr=NULL;
SINT32 buferSize=0;
SINT32 maxbuffSize=0;
if(layPtr != NULL && laylen != NULL)
{
ret = S_INVALID_ADDRESS;
goto BATCHNORM1D_OUTSIZE_EXIT;
}
if(input.imgH != 1 || input.imgW != 1)
{
ret = S_INVALID_PARM;
goto BATCHNORM1D_OUTSIZE_EXIT;
}
output->imgChannel = input.imgChannel;
output->imgH = input.imgH;
output->imgW = input.imgW;
*retBufSize = 0;
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
BATCHNORM1D_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_AdaptiveAvgPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0;
LayValueSt laySt;
SINT32 layCnt=0;
SINT32 kerW = 0, kerH = 0;
FP32* pNetParamPtr=NULL;
SINT32 channel = 0, rows = 0, cols = 0;
SINT32 k_row = 0, k_col = 0;
SINT32 inBatch = 0, outBatch= 0, kerArea = 0;
// FP64 avgval = 0.f
FP32 avgVal = 0.f;
SINT32 input_offset = 0, input_offset_2 = 0;
SINT32 output_offset = 0, output_offset_2 = 0;
ST_CnnLayInOut tmpInOut;
Ft_Zeros_ST_CnnLayInOut(&tmpInOut);
if(layPtr == NULL || output == NULL)
{
ret = S_INVALID_ADDRESS;
goto ADAPTIVEAVGPOOL2D_EXIT;
}
if(input.dataBuff == NULL || output->dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
goto ADAPTIVEAVGPOOL2D_EXIT;
}
tmpInOut.imgH = input.imgH;
tmpInOut.imgW = input.imgW;
tmpInOut.imgChannel = input.imgChannel;
ret = Ft_SafeAlloc_ST_CnnLayInOut(&tmpInOut);
if(ret != 0)
{
LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut(),ret = %d;", __FUNCTION__, __LINE__, ret);
goto ADAPTIVEAVGPOOL2D_EXIT;
}
memcpy(tmpInOut.dataBuff, input.dataBuff, input.validLen);
*paramlen = 0;// 不使用卷积层相关参数
memcpy(&laySt, layPtr, sizeof(LayValueSt));
output->imgH = laySt.kerH;
output->imgW = laySt.kerW;
output->imgChannel = input.imgChannel;
if(output->validLen < laySt.kerH*laySt.kerW*input.imgChannel*sizeof(FP32))
{
ret = S_ERROR_LEAK;
LOGE("%s[%05d]...has error memory leak(),ret = %d;", __FUNCTION__, __LINE__, ret);
goto ADAPTIVEAVGPOOL2D_EXIT;
}
//开始计算窗口大小
kerH = tmpInOut.imgH + 1 - output->imgH;
kerW = tmpInOut.imgW + 1 - output->imgW;
inBatch = tmpInOut.imgH*tmpInOut.imgW;
outBatch = output->imgH * output->imgW;
kerArea = kerH*kerW;
for(channel = 0; channel < input.imgChannel; channel++)
{
output_offset_2 = output_offset;
for(rows = 0; rows < output->imgH; rows++)
{
for(cols = 0; cols< output->imgW; cols++)
{
avgVal = 0;
input_offset_2 = input_offset + rows* input.imgW;
for(k_row = 0; k_row< kerH; k_row++)
{
for(k_col = 0; k_col < kerW; k_col++)
{
avgVal += *(tmpInOut.dataBuff + input_offset_2 + cols + k_col);
}
input_offset_2 += input.imgW;
}
*(output->dataBuff + output_offset_2 + cols) = avgVal / kerArea;
}
output_offset_2 += output->imgW;
}
input_offset += inBatch;
output_offset += outBatch;
}
*paramlen = 0;
//-- exit--
ADAPTIVEAVGPOOL2D_EXIT:
Ft_SafeFree_ST_CnnLayInOut(&tmpInOut);
return ret;
}
SINT32 Net_AdaptiveAvgPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
LayValueSt laySt;
if(layPtr == NULL || output == NULL)
{
ret = S_INVALID_ADDRESS;
goto ADAPTIVEAVGPOOL2D_OUTSIZE_EXIT;
}
memcpy(&laySt, layPtr, sizeof(LayValueSt));
// 当采用 AdativeAvgPool2d的时候, LayValueSt.kerSize就是最终的输出图像大小
output->imgH = laySt.kerH;
output->imgW = laySt.kerW;
output->imgChannel = input.imgChannel;
//最大的使用内存量
*retBufSize = input.imgH*input.imgChannel*input.imgW*sizeof(FP32);
if(bAlloc != 0)
{
ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
ADAPTIVEAVGPOOL2D_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_My_cat_2_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret = 0;
SINT32 len1 = 0;
if(output == NULL)
{
ret = S_INVALID_ADDRESS;
goto MY_CAT_2_EXIT;
}
if(output->dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
goto MY_CAT_2_EXIT;
}
if(input.imgH != input2.imgH || input.imgW != input2.imgW)
{
ret = S_INVALID_PARM;
goto MY_CAT_2_EXIT;
}
// if(input.validLen + input2.validLen > output->validLen)
// {
// ret = S_ERROR_LEAK;
// goto MY_CAT_2_EXIT;
// }
output->imgH = input.imgH;
output->imgW = input.imgW;
output->imgChannel = input.imgChannel + input2.imgChannel;
memcpy(output->dataBuff, input.dataBuff, input.validLen);
memcpy(((UINT8*)output->dataBuff + input.validLen), input2.dataBuff, input2.validLen);
*paramlen = 0;
//-- exit--
MY_CAT_2_EXIT:
return ret;
}
SINT32 Net_My_cat_2_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
if(output == NULL || layPtr == NULL || retBufSize == NULL)
{
ret = -1;
goto MY_CAT_2_OUTSIZE_EXIT;
}
if(input.imgH != input2.imgH || input.imgW != input2.imgW)
{
ret = S_INVALID_PARM;
goto MY_CAT_2_OUTSIZE_EXIT;
}
output->imgH = input.imgH;
output->imgW = input.imgW;
output->imgChannel = input.imgChannel + input2.imgChannel;
*retBufSize = 0;
if(bAlloc != 0)
{
ret=Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
MY_CAT_2_OUTSIZE_EXIT:
return ret;
}
SINT32 Net_My_linear_mul_channel_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
SINT32 ret=0;
SINT32 ch = 0, batchsize = 0;
SINT32 idx = 0, output_offset = 0;
if(output == NULL)
{
ret = S_INVALID_ADDRESS;
goto MY_LINEAR_MUL_CHANNEL_EXIT;
}
if(output->dataBuff == NULL)
{
ret = S_INVALID_ADDRESS;
goto MY_LINEAR_MUL_CHANNEL_EXIT;
}
if(1 != input2.imgH || 1 != input2.imgW || input.imgChannel != input2.imgChannel)
{
ret = S_INVALID_PARM;
goto MY_LINEAR_MUL_CHANNEL_EXIT;
}
if(input.imgH != output->imgH || input.imgW != output->imgW || input.imgChannel != output->imgChannel)
{
ret = S_INVALID_PARM;
goto MY_LINEAR_MUL_CHANNEL_EXIT;
}
output_offset = 0;
batchsize = input.imgH*input.imgW;
for(ch = 0;ch < input.imgChannel; ch++)
{
for(idx = 0; idx < batchsize; idx++)
{
*(output->dataBuff + output_offset + idx) = *(input.dataBuff +output_offset +idx)**(input2.dataBuff + ch);
}
output_offset += batchsize;
}
*paramlen = 0;
//-- exit--
MY_LINEAR_MUL_CHANNEL_EXIT:
return ret;
}
SINT32 Net_My_linear_mul_channel_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
SINT32 ret=0;
if(output == NULL || layPtr == NULL || retBufSize == NULL)
{
ret = S_INVALID_ADDRESS;
goto MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT;
}
if(1 != input2.imgH || 1 != input2.imgW || input.imgChannel != input2.imgChannel)
{
ret = S_INVALID_PARM;
goto MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT;
}
output->imgH = input.imgH;
output->imgW = input.imgW;
output->imgChannel = input.imgChannel;
*retBufSize = 0;
if(bAlloc != 0)
{
ret=Ft_SafeAlloc_ST_CnnLayInOut(output);
}
//-- exit--
MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT:
return ret;
}
#endif