模型转换需要用到的一些基本层的定义

深度学习在计算机视觉等模式识别领域具有强大的能力,但是也遭遇内存占用大、部署环境不友好问题,这里打算写一个专栏,从模型转换到模型压缩,以期可以为深度学习的广泛使用做推广。

深度学习需要部署环境,但是这对硬件支持提出了很多要求,而C的支持则相对比较广泛,因此模型的应用先转换为C较为具有广泛的适用性。这里给出深度学习层的C定义方式,注意这里暂不支持反向传播部分,反向传播设计损失计算、梯度更新以及优化策略等。

#ifdef _WINDOWS
#include "../../inc/netModel/cn_lay_infer.h"
#include "../../inc/netModel/FtCnnNetStructDef.h"
#else
#include "cn_lay_infer.h"
#include "FtCnnNetStructDef.h"
#endif

#ifndef TEST_INTEGER
/*
* up_padding: hPadding;
* left_padding: wPadding
*/
SINT32 Infer_padding_layInout(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 up_padding, SINT32 left_padding)
{
    SINT32 ret = 0, i = 0, c = 0;
    SINT32 src_plane_offset = 0, dst_plane_offset = 0;
    SINT32 inBatch = 0, outBatch = 0;
    SINT32 wbytes = 0;
    if(output.dataBuff == NULL || input.dataBuff == NULL)
    {
        LOGE("%s... address = NULL\n", __FUNCTION__);
        return -1;
    }

    inBatch = input.imgW*input.imgH;
    outBatch = output.imgW*output.imgH;
    if(inBatch <=0 || outBatch <=0 ||(output.imgChannel != input.imgChannel))
    {
        LOGE("%s... param is error\n", __FUNCTION__);
        return -2;
    }


    wbytes = input.imgW*sizeof(FP32);
    for (c = 0; c < input.imgChannel; c++)
    {
        src_plane_offset = inBatch *c;
        dst_plane_offset = outBatch *c + up_padding*output.imgW + left_padding;
        for (i = 0; i < input.imgH; i++)
        {
            memcpy(output.dataBuff + dst_plane_offset, input.dataBuff + src_plane_offset, wbytes);
            src_plane_offset += input.imgW;
            dst_plane_offset += output.imgW;
        }
    }
    return ret;
}


//SINT32 Infer_conv2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 in_c_idx, SINT32 out_c_idx,\
//                                LayValueSt laySt, FP32* netParam)
//{
//    SINT32 ret = 0, src_i, src_j, dst_r, dst_col_idx,kr,kc;
//    SINT32 input_row_offset, input_offset;
    FP64 sum;
//    FP64 sum = 0.f;
//    FP32*weightPtr = NULL;
//    SINT32 inputPos = 0;
//    SINT32 output_offset = 0;

//    // 参数检查
//    if (output.dataBuff == NULL || NULL == netParam || input.dataBuff == NULL)
//    {
//        ret = -1;
//        goto Infer_conv2d_one_channel;
//    }
//    weightPtr = netParam;
//    inputPos = input.imgW*input.imgH* in_c_idx;
//    output_offset = output.imgW*output.imgH*out_c_idx;

//    //滑窗双循环
//    for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
//    {
//        for (src_j=0,dst_col_idx = 0; dst_col_idx < output.imgW; src_j += laySt.wStride, dst_col_idx++)// COL
//        {
//            sum = 0;
//            weightPtr = netParam;
//            input_offset = src_i * input.imgW + src_j;
//            for (kr = 0; kr < laySt.kerH; kr++)
//            {
//                for (kc = 0; kc < laySt.kerW; kc++)
//                {
//                    //窗口求和
//                    sum += *(input.dataBuff + inputPos + input_offset + kc*laySt.wDilate) **(weightPtr+ kc);//*(FP64)*(weightPtr+ kc);
//                }
//                weightPtr += laySt.kerW;
//                input_offset += input.imgW*laySt.hDilate;
//            }
//            // 填到目标地址
//            *(output.dataBuff + output_offset + dst_col_idx) = sum;
//        }
//        output_offset += output.imgW;
//    }
//Infer_conv2d_one_channel:
//    return ret;
//}
SINT32 Infer_conv2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 in_c_idx, SINT32 out_c_idx,\
                                LayValueSt laySt, FP32* netParam)
{
    SINT32 ret = 0, src_i, src_j, dst_r, dst_col_idx,kr,kc;
    SINT32 input_row_offset, input_offset;
//    FP64 sum;
    FP32 sum = 0.f;
    FP32*weightPtr = NULL;
    SINT32 inputPos = 0;
    SINT32 output_offset = 0;
	SINT32 sf[35]={0};
	FP32* pdst=output.dataBuff;
	FP32* psrc=input.dataBuff;
	FP32 tmpd[15]={0};
	UINT8 tmpi=0;

    // 参数检查
    if (output.dataBuff == NULL || NULL == netParam || input.dataBuff == NULL)
    {
        ret = -1;
        goto Infer_conv2d_one_channel;
    }
    weightPtr = netParam;
    inputPos = input.imgW*input.imgH* in_c_idx;
    output_offset = output.imgW*output.imgH*out_c_idx;
	pdst+=output_offset;
	psrc+=inputPos;
	if(0)
	{
	}
#if 1
	else if((laySt.kerW==3)&&(laySt.kerH==3))
	{
		sf[0]=0;
		sf[1]=laySt.wDilate;
		sf[2]=2*laySt.wDilate;
		sf[3]=sf[0]+input.imgW*laySt.hDilate;
		sf[4]=sf[1]+input.imgW*laySt.hDilate;
		sf[5]=sf[2]+input.imgW*laySt.hDilate;
		sf[6]=sf[3]+input.imgW*laySt.hDilate;
		sf[7]=sf[4]+input.imgW*laySt.hDilate;
		sf[8]=sf[5]+input.imgW*laySt.hDilate;
		for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
		{
			psrc=input.dataBuff+inputPos+src_i * input.imgW;
			for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
			{
				*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
				*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
				*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8);
				pdst++;
				psrc+= laySt.wStride;
			}
			//psrc+=(laySt.hStride-1)*input.imgW;
		}
	}
	else if((laySt.kerW==3)&&(laySt.kerH==5))
	{
		sf[0]=0;
		sf[1]=laySt.wDilate;
		sf[2]=2*laySt.wDilate;
		sf[3]=sf[0]+input.imgW*laySt.hDilate;
		sf[4]=sf[1]+input.imgW*laySt.hDilate;
		sf[5]=sf[2]+input.imgW*laySt.hDilate;
		sf[6]=sf[3]+input.imgW*laySt.hDilate;
		sf[7]=sf[4]+input.imgW*laySt.hDilate;
		sf[8]=sf[5]+input.imgW*laySt.hDilate;
		sf[9]=sf[6]+input.imgW*laySt.hDilate;
		sf[10]=sf[7]+input.imgW*laySt.hDilate;
		sf[11]=sf[8]+input.imgW*laySt.hDilate;
		sf[12]=sf[9]+input.imgW*laySt.hDilate;
		sf[13]=sf[10]+input.imgW*laySt.hDilate;
		sf[14]=sf[11]+input.imgW*laySt.hDilate;
		
		for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
		{
			psrc=input.dataBuff+inputPos+src_i * input.imgW;
			for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
			{
//				if((dst_r<=3)&&(dst_col_idx<=3))
//				{
//					printf("\n%d*%d:",dst_r,dst_col_idx);
//				for(tmpi=0;tmpi<15;tmpi++)
//					{
//				    //tmpd[tmpi]=*(psrc+sf[tmpi]);
//					tmpd[tmpi]=(psrc+sf[tmpi]-input.dataBuff);
//					printf("%d-",(UINT32)tmpd[tmpi]);
//					}
//				}
				*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
				*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
				*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
				*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
				*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14);
				pdst++;
				psrc+= laySt.wStride;
			}
			//psrc+=(laySt.hStride-1)*input.imgW;
			
		}
	}
	else if((laySt.kerW==5)&&(laySt.kerH==3))
	{
		sf[0]=0;
		sf[1]=laySt.wDilate;
		sf[2]=2*laySt.wDilate;
		sf[3]=3*laySt.wDilate;
		sf[4]=4*laySt.wDilate;
		sf[5]=sf[0]+input.imgW*laySt.hDilate;
		sf[6]=sf[1]+input.imgW*laySt.hDilate;
		sf[7]=sf[2]+input.imgW*laySt.hDilate;
		sf[8]=sf[3]+input.imgW*laySt.hDilate;
		sf[9]=sf[4]+input.imgW*laySt.hDilate;
		sf[10]=sf[5]+input.imgW*laySt.hDilate;
		sf[11]=sf[6]+input.imgW*laySt.hDilate;
		sf[12]=sf[7]+input.imgW*laySt.hDilate;
		sf[13]=sf[8]+input.imgW*laySt.hDilate;
		sf[14]=sf[9]+input.imgW*laySt.hDilate;

		for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
		{
			psrc=input.dataBuff+inputPos+src_i * input.imgW;
			for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
			{
				*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
				*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
				*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
				*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
				*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14);
				pdst++;
				psrc+= laySt.wStride;
			}
			//psrc+=(laySt.hStride-1)*input.imgW;
		}
	}
	else if((laySt.kerW==5)&&(laySt.kerH==7))
	{
		sf[0]=0;
		sf[1]=laySt.wDilate;
		sf[2]=2*laySt.wDilate;
		sf[3]=3*laySt.wDilate;
		sf[4]=4*laySt.wDilate;
		sf[5]=sf[0]+input.imgW*laySt.hDilate;
		sf[6]=sf[1]+input.imgW*laySt.hDilate;
		sf[7]=sf[2]+input.imgW*laySt.hDilate;
		sf[8]=sf[3]+input.imgW*laySt.hDilate;
		sf[9]=sf[4]+input.imgW*laySt.hDilate;
		sf[10]=sf[5]+input.imgW*laySt.hDilate;
		sf[11]=sf[6]+input.imgW*laySt.hDilate;
		sf[12]=sf[7]+input.imgW*laySt.hDilate;
		sf[13]=sf[8]+input.imgW*laySt.hDilate;
		sf[14]=sf[9]+input.imgW*laySt.hDilate;
		sf[15]=sf[10]+input.imgW*laySt.hDilate;
		sf[16]=sf[11]+input.imgW*laySt.hDilate;
		sf[17]=sf[12]+input.imgW*laySt.hDilate;
		sf[18]=sf[13]+input.imgW*laySt.hDilate;
		sf[19]=sf[14]+input.imgW*laySt.hDilate;
		sf[20]=sf[15]+input.imgW*laySt.hDilate;
		sf[21]=sf[16]+input.imgW*laySt.hDilate;
		sf[22]=sf[17]+input.imgW*laySt.hDilate;
		sf[23]=sf[18]+input.imgW*laySt.hDilate;
		sf[24]=sf[19]+input.imgW*laySt.hDilate;
		sf[25]=sf[20]+input.imgW*laySt.hDilate;
		sf[26]=sf[21]+input.imgW*laySt.hDilate;
		sf[27]=sf[22]+input.imgW*laySt.hDilate;
		sf[28]=sf[23]+input.imgW*laySt.hDilate;
		sf[29]=sf[24]+input.imgW*laySt.hDilate;
		sf[30]=sf[25]+input.imgW*laySt.hDilate;
		sf[31]=sf[26]+input.imgW*laySt.hDilate;
		sf[32]=sf[27]+input.imgW*laySt.hDilate;
		sf[33]=sf[28]+input.imgW*laySt.hDilate;
		sf[34]=sf[29]+input.imgW*laySt.hDilate;


		for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
		{
			psrc=input.dataBuff+inputPos+src_i * input.imgW;
			for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
			{
				*pdst=*(psrc+sf[0])**(weightPtr)+*(psrc+sf[1])**(weightPtr+1)+*(psrc+sf[2])**(weightPtr+2)+
				*(psrc+sf[3])**(weightPtr+3)+*(psrc+sf[4])**(weightPtr+4)+*(psrc+sf[5])**(weightPtr+5)+
				*(psrc+sf[6])**(weightPtr+6)+*(psrc+sf[7])**(weightPtr+7)+*(psrc+sf[8])**(weightPtr+8)+
				*(psrc+sf[9])**(weightPtr+9)+*(psrc+sf[10])**(weightPtr+10)+*(psrc+sf[11])**(weightPtr+11)+
				*(psrc+sf[12])**(weightPtr+12)+*(psrc+sf[13])**(weightPtr+13)+*(psrc+sf[14])**(weightPtr+14)+
				*(psrc+sf[15])**(weightPtr+15)+*(psrc+sf[16])**(weightPtr+16)+*(psrc+sf[17])**(weightPtr+17)+*(psrc+sf[18])**(weightPtr+18)+*(psrc+sf[19])**(weightPtr+19)+
				*(psrc+sf[20])**(weightPtr+20)+*(psrc+sf[21])**(weightPtr+21)+*(psrc+sf[22])**(weightPtr+22)+*(psrc+sf[23])**(weightPtr+23)+*(psrc+sf[24])**(weightPtr+24)+
				*(psrc+sf[25])**(weightPtr+25)+*(psrc+sf[26])**(weightPtr+26)+*(psrc+sf[27])**(weightPtr+27)+*(psrc+sf[28])**(weightPtr+28)+*(psrc+sf[29])**(weightPtr+29)+
				*(psrc+sf[30])**(weightPtr+30)+*(psrc+sf[31])**(weightPtr+31)+*(psrc+sf[32])**(weightPtr+32)+*(psrc+sf[33])**(weightPtr+33)+*(psrc+sf[34])**(weightPtr+34)
				;
				pdst++;
				psrc+= laySt.wStride;
			}
			//psrc+=(laySt.hStride-1)*input.imgW;
		}
	}

	else if((laySt.kerW==1)&&(laySt.kerH==1))
	{
		for (dst_r = 0; dst_r < output.imgH; dst_r++)//R
		{
			for (dst_col_idx = 0; dst_col_idx < output.imgW; dst_col_idx++)// COL
			{
				*pdst=*(psrc)**(weightPtr);
				sum=*pdst;
				pdst++;
				psrc+= laySt.wStride;
			}
		}
	}
#endif	
	else
	{
    //滑窗双循环	
		printf("\n%d-%d;",laySt.kerW,laySt.kerH);
		
    for (src_i =0,dst_r = 0; dst_r < output.imgH; src_i+= laySt.hStride, dst_r++)//R
    {
        for (src_j=0,dst_col_idx = 0; dst_col_idx < output.imgW; src_j += laySt.wStride, dst_col_idx++)// COL
        {
            sum = 0;
            weightPtr = netParam;
            input_offset = src_i * input.imgW + src_j;
            for (kr = 0; kr < laySt.kerH; kr++)
            {
                for (kc = 0; kc < laySt.kerW; kc++)
                {
                    //窗口求和
                    sum += *(input.dataBuff + inputPos + input_offset + kc*laySt.wDilate) **(weightPtr+ kc);// 原来: *(FP64)*(weightPtr+ kc); , 强转为64位浮点数.
				}
                weightPtr += laySt.kerW;
                input_offset += input.imgW*laySt.hDilate;
            }
            // 填到目标地址
            *(output.dataBuff + output_offset + dst_col_idx) = sum;
        }
        output_offset += output.imgW;
    }
	}
//printf("\n%f-%d-%d-%f",sum,laySt.kerW,laySt.kerH,*(weightPtr));
Infer_conv2d_one_channel:
    return ret;
}

SINT32 Infer_maxpool2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 channel,\
                                   LayValueSt laySt)
{
    SINT32 ret = 0, src_r, src_c, dst_r, dst_c, kr, k_c;
    SINT32 input_offset;
    FP32 max= -1e+7;
    SINT32 inputPos, outputPos;
    if(input.dataBuff == NULL || output.dataBuff == NULL)
    {
        return -1;
    }

    inputPos = input.imgW*input.imgH* channel;
    outputPos = output.imgW*output.imgH*channel;
    //滑窗双循环
    for (src_r = 0, dst_r = 0; dst_r < output.imgH; src_r += laySt.hStride, dst_r++,outputPos += output.imgW)
    {
        for (src_c = 0, dst_c = 0; dst_c < output.imgW; src_c += laySt.wStride, dst_c++)
        {
            max = -1e+7;//每个窗口评估一次
            input_offset = inputPos + src_r*input.imgW + src_c;
            for (kr = 0; kr < laySt.kerH; kr++,input_offset += input.imgW)
            {
                for (k_c = 0; k_c < laySt.kerW; k_c++)
                {
                    //窗口求和
                    if(*(input.dataBuff + input_offset + k_c) > max)
                        max = *(input.dataBuff + input_offset + k_c);
                }
            }
            // 填到目标地址
            *(output.dataBuff + outputPos + dst_c) = max;
        }
    }
    return ret;
}

SINT32 Infer_AvgPool2d_one_channel(ST_CnnLayInOut input, ST_CnnLayInOut output, SINT32 channel,\
                                                 LayValueSt laySt)
{
    SINT32 ret = 0, src_r, src_c, dst_r, dst_c, kr, k_c;
    SINT32 input_row_offset, input_col_offset;
    FP32 sumVal = 0;
    SINT32 inputPos, outputPos;
    if(output.dataBuff == NULL || input.dataBuff == NULL)
    {
        return -1;
    }

    inputPos = input.imgW*input.imgH* channel;
    outputPos = output.imgW*output.imgH*channel;
    //滑窗双循环
    for (src_r = 0, dst_r = 0; dst_r < output.imgH; src_r += laySt.hStride, dst_r++)
    {
        for (src_c = 0, dst_c = 0; dst_c < output.imgW; src_c += laySt.wStride, dst_c++)
        {
            sumVal = 0;//每个窗口评估一次
            for (kr = 0; kr < laySt.kerH; kr++)
            {
                input_row_offset = inputPos + (kr + src_r)* laySt.kerW + src_c;
                for (k_c = 0; k_c < laySt.kerW; k_c++)
                {
                    input_col_offset = input_row_offset + k_c;
                    //窗口求和
                    sumVal += *(input.dataBuff + input_col_offset);
                }
            }
            // 填到目标地址
            *(output.dataBuff + outputPos + dst_r*output.imgW + dst_c) = sumVal/(laySt.kerH*laySt.kerW);
        }
    }
    return ret;
}
#endif

基本层的定义:

#ifdef _WINDOWS
#include "../../inc/netModel/cn_BaseNet.h"
#include "../../inc/netModel/cn_lay_infer.h"
#include "../../inc/netModel/FtCnnNetStructDef.h"
#include "../../inc/netModel/FtNetMemoryManger.h"
#define LOGE(...)
#define LOGD(...)

#else
#include "FtCommon.h"
#include "cn_BaseNet.h"
#include "cn_lay_infer.h"
#include "FtCnnNetStructDef.h"
#include "FtNetMemoryManger.h"
#endif

#ifndef TEST_INTEGER
SINT32 Net_Conv2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0,posIdx = 0;
    SINT16* pLayPtr=layPtr;
    SINT32 layCnt=0;
    SINT32 tmp = 0;
    SINT32 netParamCnt=0;
    ST_CnnLayInOut _padding, tmpLayInOut;
    LayValueSt laySt;

    SINT32 channel_in_idx = 0, out_c_idx = 0, channel_in_start = 0, channel_in_end = 0;
    SINT32 each_group_channel,output_offset;
    SINT32 inBatch=0, outBatch = 0,kerBatch;
    SINT32 biasPos = 0;
    FP32* pKernel = paramPtr;
    FP32* pkerBias = NULL;

    //-- zero
    Ft_Zeros_ST_CnnLayInOut(&_padding);
    Ft_Zeros_ST_CnnLayInOut(&tmpLayInOut);
    if(input.dataBuff == NULL || output == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto CONV2D_EXIT;
    }

    memcpy(&laySt, layPtr, sizeof(LayValueSt));  // right>left copy
    if(input.imgChannel != laySt.inChannel)
    {
        ret = -2;  // layer's in channel should be equal to inputs' channel.
        goto CONV2D_EXIT;
    }
    // computing input size after padding operation.
    _padding.imgW = input.imgW + laySt.wPadding*2;  // size 上下、左右是对称的,所以 *2
    _padding.imgH = input.imgH + laySt.hPadding*2;
    _padding.imgChannel = input.imgChannel;  // padding do not change channel num.

    //-- 计算得到的内存区间大小   计算公式: [imgW+2P-D*(kernelsize_W-1)-1] / stride_W + 1
    output->imgW = (input.imgW + laySt.wPadding *2 - laySt.wDilate *(laySt.kerW - 1) -1)/laySt.wStride + 1;
    output->imgH = (input.imgH + laySt.hPadding *2 - laySt.hDilate *(laySt.kerH - 1) -1)/laySt.hStride +1;
    output->imgChannel = laySt.outChannel;  // output feature channel should equal to layer out channel.

    tmp = output->imgW*output->imgH*output->imgChannel *sizeof(FP32);  // output needed storage size
    if(tmp > output->validLen)
    {
        LOGE("*** output is error %d, %d!", tmp, output->validLen);
    }

    tmpLayInOut.imgW = output->imgW;
    tmpLayInOut.imgH = output->imgH;
    tmpLayInOut.imgChannel = 1;

    kerBatch = laySt.kerH*laySt.kerW;  // nums of digits in one channel kernel 
    inBatch = _padding.imgW *_padding.imgH; // number of digits needed in one feature channel after input - padding operation.
    outBatch = tmpLayInOut.imgW*tmpLayInOut.imgH; // number of digits needed in one output feature channel after conv operation.
    each_group_channel = _padding.imgChannel / laySt.Groups;  // input after padding, channels number in each group.

    //--malloc buffer;
    ret = Ft_SafeAlloc_ST_CnnLayInOut(&_padding);  // 为输入特征(input feature after padding)分配空间
    // 按位或赋值(|=) 运算符使用两个操作数的二进制表示,对它们执行按位或运算并将结果分配给变量。
    ret |= Ft_SafeAlloc_ST_CnnLayInOut(&tmpLayInOut);  // 为输出(output feature)分配空间 正常分配时返回值应为0
    if(ret != 0)
    {
        ret = -3;
        goto CONV2D_EXIT;
    }

    //-- DoInit
    ret = Infer_padding_layInout(input, _padding, laySt.hPadding,laySt.wPadding);
    if(ret != 0)
    {
        ret =-4;
        goto CONV2D_EXIT;
    }
    //-- handle
    *paramlen =(laySt.inChannel*kerBatch * laySt.outChannel)/laySt.Groups;
    biasPos = *paramlen;
    if(laySt.BiasSate)
    {
        *paramlen += laySt.outChannel;
        pkerBias = paramPtr + biasPos;
    }
    memset(output->dataBuff, 0x00, output->imgChannel*outBatch*sizeof(FP32));

    // 卷积
    channel_in_start = 0;
    channel_in_end = each_group_channel;
    output_offset = 0;
    for (out_c_idx =0; out_c_idximgChannel; out_c_idx++)
    {
        // 偏置项
        if (pkerBias)
        {
            for (posIdx = 0; posIdx < outBatch; posIdx++)
            {
                *(output->dataBuff + output_offset + posIdx) += *(pkerBias + out_c_idx);
            }
        }

        for (channel_in_idx = channel_in_start; channel_in_idxdataBuff + output_offset + posIdx) += *(tmpLayInOut.dataBuff + posIdx);
            }
            //内存清空
            memset(tmpLayInOut.dataBuff, 0x00, tmpLayInOut.validLen);
            pKernel += kerBatch;
        }

        // input组得一个轮回,重新置0
        if (channel_in_end >= input.imgChannel)
        {
            channel_in_start = 0;
            channel_in_end = each_group_channel;
        }
        else   //这里针对分组卷积,例如可分卷积
        {
            channel_in_start = channel_in_end;
            channel_in_end += each_group_channel;
        }
        output_offset += outBatch;
    }
    //-- exit--
CONV2D_EXIT:
    Ft_SafeFree_ST_CnnLayInOut(&tmpLayInOut);
    Ft_SafeFree_ST_CnnLayInOut(&_padding);
    return ret;
}

//2022 01 11
SINT32 Net_Conv2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    LayValueSt laySt;
    SINT32 layCnt=0;
    SINT32 buferSize=0;
    SINT32 maxbuffSize=0;
    if(layPtr == NULL || output == NULL || retBufSize == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto CONV2D_OUTSIZE_EXIT;
    }

    memcpy(&laySt, layPtr, sizeof(LayValueSt)); //从存储区layPtr复制sizeof(LayValueSt)个字节到存储区&laySt


    //-- 计算得到的内存区间大小 计算输出特征的尺寸:Out_w = (in_w -(wDilate*(kernel_w-1)+1) +2P )/s + 1
    output->imgW = (input.imgW + laySt.wPadding *2 - laySt.wDilate *(laySt.kerW - 1) -1)/laySt.wStride + 1; //输出特征W
    output->imgH = (input.imgH + laySt.hPadding *2 - laySt.hDilate *(laySt.kerH - 1) -1)/laySt.hStride + 1; //输出特征H
    output->imgChannel = laySt.outChannel;  // 输出特征C


    //计算每一次卷积层需要消耗的内存大小
    buferSize = output->imgW*output->imgH;
    buferSize += (input.imgW + laySt.wPadding *2)*(input.imgH + laySt.hPadding *2)*input.imgChannel; // 特征Tensor浮点数个数
    *retBufSize = buferSize*sizeof(FP32);

    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
CONV2D_OUTSIZE_EXIT:
    return ret;
}


SINT32 Net_MaxPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret = 0;
    SINT32 c_idx;
    LayValueSt laySt;
    ST_CnnLayInOut _padding;
    SINT32 rows = 0, cols = 0;

    Ft_Zeros_ST_CnnLayInOut(&_padding);
    // 参数检查
    if(output == NULL || layPtr == NULL)
    {
        ret = -1;
        goto MAXPOOL2D_OUTSIZE_EXIT;
    }
    memcpy(&laySt, layPtr, sizeof(LayValueSt));

    _padding.imgW = input.imgW + laySt.wPadding*2;
    _padding.imgH = input.imgH + laySt.hPadding*2;
    _padding.imgChannel = input.imgChannel;
    //向上取整
    if(laySt.ceilMode >0)
    {
        cols = _padding.imgW -1 -laySt.wDilate*(laySt.kerW -1);
        rows = _padding.imgH -1 -laySt.hDilate*(laySt.kerH -1);
        _padding.imgW +=  laySt.wStride- (cols%(laySt.wStride));
        _padding.imgH += laySt.hStride- (rows%(laySt.hStride));
    }
    ret = Ft_SafeAlloc_ST_CnnLayInOut(&_padding);
    if(ret !=0)
    {
        LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut(),ret = %d;\n", __FUNCTION__, __LINE__, ret);
        goto MAXPOOL2D_OUTSIZE_EXIT;
    }
    Infer_padding_layInout(input, _padding, laySt.hPadding, laySt.wPadding);
    output->imgW = (_padding.imgW -1 - laySt.wDilate*(laySt.kerW -1))/laySt.wStride +1;
    output->imgH = (_padding.imgH -1 - laySt.hDilate*(laySt.kerH -1))/laySt.hStride +1;
    output->imgChannel = _padding.imgChannel;

    // maxpool 操作
    for (c_idx = 0; c_idx < input.imgChannel; c_idx++)
    {
        Infer_maxpool2d_one_channel(_padding, *output, c_idx, laySt);
    }
    *paramlen = 0;
MAXPOOL2D_OUTSIZE_EXIT:
    Ft_SafeFree_ST_CnnLayInOut(&_padding);
    return ret;
}

SINT32 Net_MaxPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    SINT32 layCnt=0;
    SINT32 buferSize=0;
    SINT32 maxbuffSize=0;
    SINT32 imgW = 0, imgH = 0;
    SINT32 cols = 0, rows = 0;
    LayValueSt laySt;

    if(layPtr == NULL || output == NULL || retBufSize == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto MAXPOOL2D_OUTSIZE_EXIT;
    }

    //-- zero
    memcpy(&laySt, layPtr, sizeof(LayValueSt));

    //内存计算公式
    imgW = input.imgW + laySt.wPadding*2;
    imgH = input.imgH + laySt.hPadding*2;
    //向上取整
    if(laySt.ceilMode >0)
    {
        cols = imgW -1 -laySt.wDilate*(laySt.kerW -1);
        rows = imgH -1 -laySt.hDilate*(laySt.kerH -1);
        imgW += laySt.wStride- (cols%(laySt.wStride));
        imgH += laySt.hStride -(rows%(laySt.hStride));
    }
    //内部计算将要使用到的内存大小
    *retBufSize = imgW*imgH*input.imgChannel*sizeof(FP32);
    output->imgW = (imgW- laySt.wDilate*(laySt.kerW -1) -1)/laySt.wStride +1;
    output->imgH = (imgH- laySt.hDilate*(laySt.kerH -1) -1)/laySt.hStride +1;
    output->imgChannel = input.imgChannel;

    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
MAXPOOL2D_OUTSIZE_EXIT:
    return ret;
}


SINT32 Net_AvgPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret = 0;
    SINT32 c_idx = 0;
    LayValueSt laySt;
    ST_CnnLayInOut _padding;

    Ft_Zeros_ST_CnnLayInOut(&_padding);
    // 参数检查
    if(output == NULL || layPtr == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto AVGPOOL2D_OUTSIZE_EXIT;
    }
    memcpy(&laySt, layPtr, sizeof(LayValueSt));

    _padding.imgW = input.imgW + laySt.wPadding*2;
    _padding.imgH = input.imgH + laySt.hPadding*2;
    _padding.imgChannel = input.imgChannel;
    //向上取整
    if(laySt.ceilMode >0)
    {
        _padding.imgW += laySt.wStride- ((_padding.imgW - laySt.wDilate*(laySt.kerW -1))%(laySt.wStride));
        _padding.imgH += laySt.hStride -((_padding.imgH - laySt.hDilate*(laySt.kerH -1))%(laySt.hStride));
    }
    output->imgW = _padding.imgW/laySt.wStride +1;
    output->imgH = _padding.imgH/laySt.hStride +1;
    output->imgChannel = input.imgChannel;
    if(output->imgW*output->imgH*output->imgChannel *sizeof(FP32) > output->validLen)
    {
        LOGE("***harvey output is error!\n");
    }

    Ft_SafeAlloc_ST_CnnLayInOut(&_padding);
    Infer_padding_layInout(input, _padding, laySt.hPadding, laySt.wPadding);

    // maxpool 操作
    for (c_idx = 0; c_idx < input.imgChannel; c_idx++)
    {
        Infer_AvgPool2d_one_channel(_padding, *output, c_idx, laySt);
    }
    *paramlen = 0;

AVGPOOL2D_OUTSIZE_EXIT:
    Ft_SafeFree_ST_CnnLayInOut(&_padding);
    return ret;
}

SINT32 Net_AvgPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    return Net_MaxPool2d_OutSize_Infer(input, output, layPtr, laylen, retBufSize, bAlloc);
}

SINT32 Net_BatchNorm2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0;
    SINT32 c = 0,idx = 0;
    SINT32 batchSize = 0, offset = 0;;
    FP32 mean = 0.f, coe=0.f, bias= 0.f;

    //-- zero
    if(input.dataBuff == NULL || output== NULL)
    {
        ret = -1;
        goto BATCHNORM2D_EXIT;
    }
    if(output->dataBuff == NULL)
    {
        ret= -2;
        goto BATCHNORM2D_EXIT;
    }
    output->imgW = input.imgW;
    output->imgH = input.imgH;
    output->imgChannel = input.imgChannel;

    batchSize  =input.imgW*input.imgH;
    for(c = 0; c< input.imgChannel; c++)
    {
        mean =*(paramPtr + c);
        coe = *(paramPtr + c + input.imgChannel);
        bias = *(paramPtr + c + input.imgChannel*2);
        for(idx = 0; idx dataBuff +offset+ idx)= (*(input.dataBuff +offset+ idx) - mean)*coe + bias;
        }
        offset += batchSize;
    }
    *paramlen = input.imgChannel * 3;

    //-- exit--
BATCHNORM2D_EXIT:
    return ret;
}

SINT32 Net_BatchNorm2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    SINT16* pLayPtr=NULL;
    SINT32 layCnt=0;
    SINT32 buferSize=0;
    SINT32 maxbuffSize=0;
    if(output == NULL || retBufSize == NULL)
    {
        ret = -1;
        goto BATCHNORM2D_OUTSIZE_EXIT;
    }

    output->imgChannel = input.imgChannel;
    output->imgH = input.imgH;
    output->imgW = input.imgW;
    *retBufSize = 0;
    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }
    //-- exit--
BATCHNORM2D_OUTSIZE_EXIT:
    return ret;
}


SINT32 Net_LeakyReLU_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0;
    SINT32 c = 0,r = 0, idx;
    SINT32 bufferlen = 0;
    FP32 alpha = 0.01f;

    //-- zero
    if(input.dataBuff == NULL || output== NULL)
    {
        ret = -1;
        goto LEAKYRELU_EXIT;
    }
    if((output->dataBuff == NULL))
    {
        ret= -2;
        goto LEAKYRELU_EXIT;
    }

    output->imgChannel = input.imgChannel;
    output->imgH = input.imgH;
    output->imgW = input.imgW;
    idx = 0;
    bufferlen = input.imgChannel*input.imgW*input.imgH;
    for(idx =0; idx < bufferlen; idx++)
    {
        if(*(input.dataBuff + idx) < 0)
        {
            *(output->dataBuff + idx) = alpha * (*(input.dataBuff + idx));
        }
        else
        {
            *(output->dataBuff + idx) = *(input.dataBuff + idx);
        }
    }
    *paramlen = 0;
    //-- exit--
LEAKYRELU_EXIT:
    return ret;
}

SINT32 Net_LeakyReLU_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    if(layPtr != NULL && laylen != NULL)
    {
        ret = -1;
        goto LEAKYRELU_OUTSIZE_EXIT;
    }
    output->imgChannel = input.imgChannel;
    output->imgH = input.imgH;
    output->imgW = input.imgW;

    *retBufSize = 0;
    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
LEAKYRELU_OUTSIZE_EXIT:
    return ret;
}


SINT32 Net_Linear_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret = 0;
    SINT32 i = 0, inputlen = 0, j = 0;
    FP32  curValue = 0.0f;
    FP32* pParaW = NULL;
    FP32* pParaBias = NULL;

    LayValueSt laySt;
    ST_CnnLayInOut tmpInOut;

    Ft_Zeros_ST_CnnLayInOut(&tmpInOut);
    if(input.dataBuff == NULL || output== NULL)
    {
        ret = S_INVALID_ADDRESS;
        LOGE("%s :code %d", __FUNCTION__, __LINE__);
        goto LINEAR_EXIT;
    }

    if(output->dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        LOGE("*************harvey:\n");
        LOGE("%s :code %d", __FUNCTION__, __LINE__);
        goto LINEAR_EXIT;
    }
    memcpy(&laySt, layPtr, sizeof(LayValueSt));
    if(input.dataBuff == output->dataBuff)
    {
        tmpInOut.imgH = input.imgH;
        tmpInOut.imgW = input.imgW;
        tmpInOut.imgChannel = input.imgChannel;
        ret = Ft_SafeAlloc_ST_CnnLayInOut(&tmpInOut);
        if(ret != 0)
        {
            LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut() is error,ret = %d;", __FUNCTION__, __LINE__, ret);
            goto LINEAR_EXIT;
        }
        memcpy(tmpInOut.dataBuff, input.dataBuff, tmpInOut.validLen);
    }
    else
    {
        memcpy(&tmpInOut, &input, sizeof(ST_CnnLayInOut));
    }
    inputlen = tmpInOut.imgW *tmpInOut.imgH*tmpInOut.imgChannel;
    output->imgChannel = laySt.outChannel;
    output->imgH = 1;
    output->imgW = 1;

    if(inputlen != laySt.inChannel)
    {
        ret = S_INVALID_PARM;
        LOGE("%s :code %d;%d,%d\n", __FUNCTION__, __LINE__,inputlen, laySt.inChannel);
        goto LINEAR_EXIT;
    }

    pParaW = paramPtr;
    //最终的数据长度用 imgH来表示, imgW = 1
    if(laySt.BiasSate)
    {
        pParaBias = paramPtr + output->imgChannel*inputlen;
    }

    for (i = 0; i < output->imgChannel; i++)
    {
        curValue = 0.0f;
        for (j = 0; j < inputlen; j++, pParaW++)
        {
            curValue += *pParaW * (*(tmpInOut.dataBuff + j));
        }
        if (pParaBias != NULL)
        {
            curValue += *(pParaBias + i);
        }
        *(output->dataBuff + i) = curValue;
    }

    *paramlen = output->imgChannel*inputlen;
    if(laySt.BiasSate)
    {
        *paramlen += output->imgChannel;
    }
    //-- exit--
LINEAR_EXIT:
    if(tmpInOut.dataBuff != input.dataBuff)
    {
        Ft_SafeFree_ST_CnnLayInOut(&tmpInOut);
    }
    return ret;
}
SINT32 Net_Linear_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    LayValueSt laySt;

    if(layPtr == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto LINEAR_OUTSIZE_EXIT;
    }
    memcpy(&laySt, layPtr, sizeof(LayValueSt));

	SINT32 tmp = 0;
	tmp = input.imgW *input.imgH*input.imgChannel;
    if(tmp != laySt.inChannel)
    {
        ret = S_INVALID_PARM;
        goto LINEAR_OUTSIZE_EXIT;
    }

    output->imgW = 1;
    output->imgH =1;
    output->imgChannel = laySt.outChannel;
    //最大需要内存量
    *retBufSize = laySt.inChannel*sizeof(FP32);

    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
LINEAR_OUTSIZE_EXIT:
    return ret;
}

SINT32 Net_BatchNorm1d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0;
    SINT32 c = 0;
    FP32* pNetParamPtr = NULL;

    if(output == NULL || input.dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto BATCHNORM1D_EXIT;
    }
    if(output->dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto BATCHNORM1D_EXIT;
    }
    if(input.imgH != 1 || input.imgW !=1)
    {
        ret = S_INVALID_PARM;
        goto BATCHNORM1D_EXIT;
    }

    if((output->imgH != input.imgH) ||(output->imgW != input.imgW) ||
            (output->imgChannel != input.imgChannel) || (output->dataBuff == NULL))
    {
        ret = S_INVALID_PARM;
        goto BATCHNORM1D_EXIT;
    }

    pNetParamPtr = paramPtr;
    for(c = 0; c< input.imgChannel; c++)
    {
        //reuslt = (input - mean)*coe + bias
        *(output->dataBuff + c)= (*(input.dataBuff + c) -*(pNetParamPtr + c))**(pNetParamPtr + c + input.imgChannel) + *(pNetParamPtr + c + input.imgChannel*2);
    }
    *paramlen = input.imgChannel *3;

    //-- exit--
BATCHNORM1D_EXIT:
    return ret;
}

SINT32 Net_BatchNorm1d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    SINT16* pLayPtr=NULL;
    SINT32 buferSize=0;
    SINT32 maxbuffSize=0;
    if(layPtr != NULL && laylen != NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto BATCHNORM1D_OUTSIZE_EXIT;
    }

    if(input.imgH != 1 || input.imgW != 1)
    {
        ret = S_INVALID_PARM;
        goto BATCHNORM1D_OUTSIZE_EXIT;
    }
    output->imgChannel = input.imgChannel;
    output->imgH = input.imgH;
    output->imgW = input.imgW;
    *retBufSize = 0;

    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
BATCHNORM1D_OUTSIZE_EXIT:
    return ret;
}

SINT32 Net_AdaptiveAvgPool2d_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0;
    LayValueSt laySt;
    SINT32 layCnt=0;
    SINT32 kerW = 0, kerH = 0;
    FP32* pNetParamPtr=NULL;
    SINT32 channel = 0, rows = 0, cols = 0;
    SINT32 k_row = 0, k_col = 0;
    SINT32 inBatch = 0, outBatch= 0, kerArea = 0;
    // FP64 avgval = 0.f
    FP32 avgVal = 0.f;
    SINT32 input_offset = 0, input_offset_2 = 0;
    SINT32 output_offset = 0, output_offset_2 = 0;
    ST_CnnLayInOut tmpInOut;

    Ft_Zeros_ST_CnnLayInOut(&tmpInOut);
    if(layPtr == NULL || output == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto ADAPTIVEAVGPOOL2D_EXIT;
    }
    if(input.dataBuff == NULL || output->dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto ADAPTIVEAVGPOOL2D_EXIT;
    }
    tmpInOut.imgH = input.imgH;
    tmpInOut.imgW = input.imgW;
    tmpInOut.imgChannel = input.imgChannel;
    ret = Ft_SafeAlloc_ST_CnnLayInOut(&tmpInOut);
    if(ret != 0)
    {
        LOGE("%s[%05d]...Ft_SafeAlloc_ST_CnnLayInOut(),ret = %d;", __FUNCTION__, __LINE__, ret);
        goto ADAPTIVEAVGPOOL2D_EXIT;
    }

    memcpy(tmpInOut.dataBuff, input.dataBuff, input.validLen);

    *paramlen = 0;// 不使用卷积层相关参数
    memcpy(&laySt, layPtr, sizeof(LayValueSt));
    output->imgH = laySt.kerH;
    output->imgW = laySt.kerW;
    output->imgChannel = input.imgChannel;
    if(output->validLen < laySt.kerH*laySt.kerW*input.imgChannel*sizeof(FP32))
    {
        ret = S_ERROR_LEAK;
        LOGE("%s[%05d]...has error memory leak(),ret = %d;", __FUNCTION__, __LINE__, ret);
        goto ADAPTIVEAVGPOOL2D_EXIT;
    }

    //开始计算窗口大小
    kerH = tmpInOut.imgH + 1 - output->imgH;
    kerW = tmpInOut.imgW + 1 - output->imgW;
    inBatch = tmpInOut.imgH*tmpInOut.imgW;
    outBatch = output->imgH * output->imgW;
    kerArea = kerH*kerW;

    for(channel = 0; channel  < input.imgChannel; channel++)
    {
        output_offset_2 = output_offset;
        for(rows = 0; rows < output->imgH; rows++)
        {
            for(cols = 0; cols< output->imgW; cols++)
            {
                avgVal = 0;
                input_offset_2 = input_offset + rows* input.imgW;
                for(k_row = 0; k_row< kerH; k_row++)
                {
                    for(k_col = 0; k_col < kerW; k_col++)
                    {
                        avgVal += *(tmpInOut.dataBuff + input_offset_2 + cols + k_col);
                    }
                    input_offset_2 += input.imgW;
                }
                *(output->dataBuff + output_offset_2 + cols) = avgVal / kerArea;
            }
            output_offset_2 += output->imgW;
        }
        input_offset += inBatch;
        output_offset += outBatch;
    }
    *paramlen = 0;
    //-- exit--
ADAPTIVEAVGPOOL2D_EXIT:
    Ft_SafeFree_ST_CnnLayInOut(&tmpInOut);
    return ret;
}

SINT32 Net_AdaptiveAvgPool2d_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    LayValueSt laySt;

    if(layPtr == NULL || output == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto ADAPTIVEAVGPOOL2D_OUTSIZE_EXIT;
    }
    memcpy(&laySt, layPtr, sizeof(LayValueSt));
    // 当采用 AdativeAvgPool2d的时候, LayValueSt.kerSize就是最终的输出图像大小
    output->imgH = laySt.kerH;
    output->imgW = laySt.kerW;
    output->imgChannel = input.imgChannel;
    //最大的使用内存量
    *retBufSize = input.imgH*input.imgChannel*input.imgW*sizeof(FP32);
    if(bAlloc != 0)
    {
        ret = Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
ADAPTIVEAVGPOOL2D_OUTSIZE_EXIT:
    return ret;
}

SINT32 Net_My_cat_2_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret = 0;
    SINT32 len1 = 0;
    if(output == NULL)
    {
        ret = S_INVALID_ADDRESS;

        goto MY_CAT_2_EXIT;
    }
    if(output->dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto MY_CAT_2_EXIT;
    }

    if(input.imgH != input2.imgH || input.imgW != input2.imgW)
    {
        ret = S_INVALID_PARM;
        goto MY_CAT_2_EXIT;
    }


//    if(input.validLen + input2.validLen > output->validLen)
//    {
//        ret = S_ERROR_LEAK;
//        goto MY_CAT_2_EXIT;
//    }
    output->imgH = input.imgH;
    output->imgW = input.imgW;
    output->imgChannel = input.imgChannel + input2.imgChannel;
    memcpy(output->dataBuff, input.dataBuff, input.validLen);

    memcpy(((UINT8*)output->dataBuff + input.validLen), input2.dataBuff, input2.validLen);
    *paramlen = 0;
    //-- exit--
MY_CAT_2_EXIT:
    return ret;
}

SINT32 Net_My_cat_2_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    if(output == NULL || layPtr == NULL || retBufSize == NULL)
    {
        ret = -1;
        goto MY_CAT_2_OUTSIZE_EXIT;
    }

    if(input.imgH != input2.imgH || input.imgW != input2.imgW)
    {
        ret = S_INVALID_PARM;
        goto MY_CAT_2_OUTSIZE_EXIT;
    }

    output->imgH = input.imgH;
    output->imgW = input.imgW;
    output->imgChannel = input.imgChannel + input2.imgChannel;

    *retBufSize = 0;
    if(bAlloc != 0)
    {
        ret=Ft_SafeAlloc_ST_CnnLayInOut(output);
    }

    //-- exit--
MY_CAT_2_OUTSIZE_EXIT:
    return ret;
}

SINT32 Net_My_linear_mul_channel_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,FP32* paramPtr,SINT32* paramlen)
{
    SINT32 ret=0;
    SINT32 ch = 0, batchsize = 0;
    SINT32 idx = 0, output_offset = 0;
    if(output == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto MY_LINEAR_MUL_CHANNEL_EXIT;
    }
    if(output->dataBuff == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto MY_LINEAR_MUL_CHANNEL_EXIT;
    }

    if(1 != input2.imgH || 1 != input2.imgW || input.imgChannel != input2.imgChannel)
    {
        ret = S_INVALID_PARM;
        goto MY_LINEAR_MUL_CHANNEL_EXIT;
    }

    if(input.imgH != output->imgH || input.imgW != output->imgW || input.imgChannel != output->imgChannel)
    {
        ret = S_INVALID_PARM;
        goto MY_LINEAR_MUL_CHANNEL_EXIT;
    }

    output_offset = 0;
    batchsize = input.imgH*input.imgW;
    for(ch = 0;ch < input.imgChannel; ch++)
    {
        for(idx = 0; idx < batchsize; idx++)
        {
            *(output->dataBuff + output_offset + idx) = *(input.dataBuff +output_offset +idx)**(input2.dataBuff + ch);
        }
        output_offset += batchsize;
    }
    *paramlen = 0;

    //-- exit--
MY_LINEAR_MUL_CHANNEL_EXIT:
    return ret;
}

SINT32 Net_My_linear_mul_channel_OutSize_Infer(ST_CnnLayInOut input,ST_CnnLayInOut input2,ST_CnnLayInOut* output,SINT16* layPtr,SINT32 laylen,SINT32* retBufSize,SINT32 bAlloc)
{
    SINT32 ret=0;
    if(output == NULL || layPtr == NULL || retBufSize == NULL)
    {
        ret = S_INVALID_ADDRESS;
        goto MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT;
    }

    if(1 != input2.imgH || 1 != input2.imgW || input.imgChannel != input2.imgChannel)
    {
        ret = S_INVALID_PARM;
        goto MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT;
    }

    output->imgH = input.imgH;
    output->imgW = input.imgW;
    output->imgChannel = input.imgChannel;

    *retBufSize = 0;
    if(bAlloc != 0)
    {
        ret=Ft_SafeAlloc_ST_CnnLayInOut(output);
    }
    //-- exit--
MY_LINEAR_MUL_CHANNEL_OUTSIZE_EXIT:
    return ret;
}

#endif

 

你可能感兴趣的:(模型压缩-转换与部署,算法,深度学习,linux,c++,c语言)