快速傅氏变换之旅(二) 七种FFT算法速度比较(含代码)

转载请标明是引用于 http://blog.csdn.net/chenyujing1234

 例子代码:(编译工具:VS2005)

http://www.rayfile.com/zh-cn/files/76968e5e-7bde-11e1-8c13-0015c55db73d/

由于公司要做FFT算法,具体是做高尔夫球的弹道分析,至今还没把算法敲定。

今天网上看了算法,自己建立工程,进行了比较。

现在按算法速度从快到慢的顺序介绍:(先搭个框架,具体算法解释,请听下回分解)

1、

//*******************************************************************
// 将data[]中的元素进行快速傅立叶变换,当变换长度大于L2 cache,有较快的速度
BOOL FFT2(CMPL data[], size_t n)
//*******************************************************************
{
	unsigned long i;
	//------------------------------------
	if (n<4)
	{
		printf("too small transpos length\n");
		return FALSE;
	}	
	
	Init_OMAGE_ARRAY(n);
	if (g_w.arr==NULL)
	{
		printf("no enough memory\n");
		return FALSE;
	}
	reverseOrder(data,n,Log2(n)); //反序
    
	if (n<=MAX_IN_CACHE_TRANS_LEN)
	{
		fft_sub(data,n,2,n);
		return TRUE;
	}
	
	for (i=0;i<n;i+=MAX_IN_CACHE_TRANS_LEN)
	{
		fft_sub(data+i,MAX_IN_CACHE_TRANS_LEN,2,MAX_IN_CACHE_TRANS_LEN);
	}
	
	fft_sub(data,n,MAX_IN_CACHE_TRANS_LEN*2,n);
	return TRUE;
}


 

//*******************************************************************
// 将data[]中的元素进行快速傅立叶变换
BOOL FFT1(CMPL data[], size_t n)
//*******************************************************************
{
	unsigned long i,i1,i2,j1,j2,d;
	unsigned long groupBase,groupLen,omageBase;
	CMPL t,t1,t2;
	double *pW=NULL;
	char fileName[3209];
	if (n<4)
	{
		printf("too small transpos length\n");
		return FALSE;
	}	
	
	Init_OMAGE_ARRAY(n);
	if (g_w.arr==NULL)
	{
		printf("no enough memory\n");
		return FALSE;
	}
	
	reverseOrder(data,n,Log2(n)); //反序
    
	for (i=0;i<n;i+=2) // d: 翅间距=1, buffterfly group length =2, group number =n/2
	{
		t=data[i+1];
		CMPL_SUB(data[i+1],data[i],t);
		CMPL_ADD(data[i],  data[i],t);
	}
	
	#ifdef OUT_INTER_RESULT
			sprintf(fileName,"../test result/data1_%08d_FFT1.bin",2);
			dumpData(data,n,fileName);
	#endif

	for (i=0;i<n;i+=4)    // 翅间距=2, buffterfly group length =4 ,group number =n/4
	{
		t1=data[i+2];
		
		t2.Re=  data[i+3].Im;	//t2=(0,-1)*(data[i+3].Re,data[i+3].Im)
		t2.Im= -data[i+3].Re;

		CMPL_SUB(data[i+2],data[i],t1);
		CMPL_ADD(data[i],  data[i],t1);

		CMPL_SUB(data[i+3],data[i+1],t2);
		CMPL_ADD(data[i+1],data[i+1],t2);
	}
	#ifdef OUT_INTER_RESULT
			sprintf(fileName,"../test result/data1_%08d__FFT1.bin",4);
			dumpData(data,n,fileName);
	#endif

	// 翅间距>=4, buffterfly group length >=8 ,group number =n/group length
	for (groupLen=8;groupLen<=n;groupLen*=2 )
	{
 		d=groupLen/2;	//d: 翅间距
		omageBase=g_w.tabIndex[Log2(groupLen)-2].offset; 
		pW= (double *)(g_w.arr)+omageBase;	//omage array address
		for ( groupBase = 0; groupBase<n; groupBase+=groupLen)
		{
			DWORD r,t;
			i1=groupBase;				// butterfly 1 left-up index
			i2=groupBase+d/2;

			j1=i1+d;				// butterfly 1 left-down index	
			j2=i2+d;

			t1 =    data[j1];	 	//t1= e^(-2*PI*0 i) * data[j1]
			t2.Re=  data[j2].Im;	//t2= e^(-2*PI/4 i) * data[j2]
			t2.Im= -data[j2].Re;    // complex:(0,-1) * data[j2]

			CMPL_SUB(data[j1],data[i1],t1);
			CMPL_ADD(data[i1],data[i1],t1);
		    //-------------------------------
			
			CMPL_SUB(data[j2],data[i2],t2);
			CMPL_ADD(data[i2],data[i2],t2);	
			
			i1++;		i2++;
			j1++;		j2++;
			
			for (r=1; r<d/2; i1++,i2++,j1++,j2++,r++)
			{
           		t=groupLen/4-r;

				//w1.Re= pW[r];
				//w1.Im= -pW[t];
				//w2.Re= -pw[t];
				//w2.Im= -pw[r];
				
				t1.Re = ( pW[r]  * data[j1].Re +  pW[t] * data[j1].Im);
				t1.Im = ( -pW[t] * data[j1].Re + pW[r]  * data[j1].Im);
	
				t2.Re = ( -pW[t] * data[j2].Re +  pW[r]  * data[j2].Im);
				t2.Im = ( -pW[r]  * data[j2].Re -  pW[t] * data[j2].Im);
	
				CMPL_SUB(data[j1], data[i1], t1);
				CMPL_ADD(data[i1], data[i1], t1);

				CMPL_SUB(data[j2], data[i2], t2);
				CMPL_ADD(data[i2], data[i2], t2);
            }
		}
	#ifdef OUT_INTER_RESULT
		sprintf(fileName,"../test result/data1_%08d_FFT1.bin",groupLen);
		dumpData(data,n,fileName);
	
	#endif
	}
	return TRUE;

}


2、

void fft(int n, double xRe[], double xIm[],
                double yRe[], double yIm[])
{
    int   sofarRadix[maxFactorCount], 
          actualRadix[maxFactorCount], 
          remainRadix[maxFactorCount];
    int   nFactor;
    int   count;

    pi = 4*atan(1);    

    transTableSetup(sofarRadix, actualRadix, remainRadix, &nFactor, &n);
    permute(n, nFactor, actualRadix, remainRadix, xRe, xIm, yRe, yIm);

    for (count=1; count<=nFactor; count++)
      twiddleTransf(sofarRadix[count], actualRadix[count], remainRadix[count], 
                    yRe, yIm);
}   /* fft */


 

3、

void four1(double data[], int nn, int isign)
{
    int n,j,i,m,mmax,istep;
    double tempr,tempi,theta,wpr,wpi,wr,wi,wtemp;
    n = 2 * nn;
    j = 1;
    for (i = 1; i<=n; i=i+2)
    {
        if(j > i)
        {
            tempr = data[j];
            tempi = data[j + 1];
            data[j] = data[i];
            data[j + 1] = data[i + 1];
            data[i] = tempr;
            data[i + 1] = tempi;
        }
        m = n / 2;
        while (m >= 2 && j > m)
        {
            j = j - m;
            m = m / 2;
        }
        j = j + m;
    }
    mmax = 2;
    while(n > mmax)
    {
        istep = 2 * mmax;
        theta = 6.28318530717959 / (isign * mmax);
        wpr = -2.0 * sin(0.5 * theta)*sin(0.5 * theta);
        wpi = sin(theta);
        wr = 1.0;
        wi = 0.0;
        for(m = 1; m<=mmax; m=m+2)
        {
            for (i = m; i<=n; i=i+istep)
            {
                j = i + mmax;
                tempr=double(wr)*data[j]-double(wi)*data[j+1];
                tempi=double(wr)*data[j+1]+double(wi)*data[j];
                data[j] = data[i] - tempr;
                data[j + 1] = data[i + 1] - tempi;
                data[i] = data[i] + tempr;
                data[i + 1] = data[i + 1] + tempi;
            }
            wtemp = wr;
            wr = wr * wpr - wi * wpi + wr;
            wi = wi * wpr + wtemp * wpi + wi;
        }
        mmax = istep;
    }
}


4、

void fft(COMPLEX in[],COMPLEX out[], COMPLEX omega[],int n)
{
	int s,k,m,l,nv,t,j;
	COMPLEX podd,ret;

	k = (int)(log(n) / log(2) + 0.5);
	nv = n;
	m = 1;
	for ( l = k-1 ; l >= 0 ; l-- )
	{
		for ( t = 0 ; t < m * nv ; t+=nv )
			for ( j = 0 ; j < nv/2 ; j++ )
			{
				s = (t+j) / (int)pow(2,l);
				s = reverse(s,k);

				ret = omega[s];
				mul(&ret,&in[t+j+nv/2],&podd);
				sub(&in[t+j],&podd,&in[t+j+nv/2]);
				add(&in[t+j],&podd,&in[t+j]);
			}
		m *= 2;
		nv /= 2;
	}

	for ( t = 0 ; t < n ; t++ )
	{
		s = reverse(t,k);
		out[t] = in[s];
	}
}


5、

void cdft(int n, int isgn, double *a)
{
    void cftfsub(int n, double *a);
    void cftbsub(int n, double *a);
    
    if (isgn >= 0) {
        cftfsub(n, a);
    } else {
        cftbsub(n, a);
    }
}


 

void cftfsub(int n, double *a)
{
    void bitrv2(int n, double *a);
    void bitrv216(double *a);
    void bitrv208(double *a);
    void cftmdl1(int n, double *a);
    void cftrec4(int n, double *a);
    void cftleaf(int n, int isplt, double *a);
    void cftfx41(int n, double *a);
    void cftf161(double *a);
    void cftf081(double *a);
    void cftf040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a);
#endif /* USE_CDFT_THREADS */
    
    if (n > 8) {
        if (n > 32) {
            cftmdl1(n, a);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a);
            } else 
#endif /* USE_CDFT_THREADS */
            if (n > 512) {
                cftrec4(n, a);
            } else if (n > 128) {
                cftleaf(n, 1, a);
            } else {
                cftfx41(n, a);
            }
            bitrv2(n, a);
        } else if (n == 32) {
            cftf161(a);
            bitrv216(a);
        } else {
            cftf081(a);
            bitrv208(a);
        }
    } else if (n == 8) {
        cftf040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


void cftbsub(int n, double *a)
{
    void bitrv2conj(int n, double *a);
    void bitrv216neg(double *a);
    void bitrv208neg(double *a);
    void cftb1st(int n, double *a);
    void cftrec4(int n, double *a);
    void cftleaf(int n, int isplt, double *a);
    void cftfx41(int n, double *a);
    void cftf161(double *a);
    void cftf081(double *a);
    void cftb040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a);
#endif /* USE_CDFT_THREADS */
    
    if (n > 8) {
        if (n > 32) {
            cftb1st(n, a);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a);
            } else 
#endif /* USE_CDFT_THREADS */
            if (n > 512) {
                cftrec4(n, a);
            } else if (n > 128) {
                cftleaf(n, 1, a);
            } else {
                cftfx41(n, a);
            }
            bitrv2conj(n, a);
        } else if (n == 32) {
            cftf161(a);
            bitrv216neg(a);
        } else {
            cftf081(a);
            bitrv208neg(a);
        }
    } else if (n == 8) {
        cftb040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


6、

//*************************************************************************
// *
// * 函数名称:
// *   FFT()
// *
// * 参数:
// *   complex<double> * TD- 指向时域数组的指针
// *   complex<double> * FD- 指向频域数组的指针
// *   r-2的幂数,即迭代次数
// *
// * 返回值:
// *   无。
// *
// * 说明:
// *   该函数用来实现快速付立叶变换。
// *
// ************************************************************************/
void FFT(	complex<double> *TD,  complex<double> *FD, 
			complex<double> *X1,  complex<double> *X2,
			complex<double> *Omega,	  int r)
{
	// 付立叶变换点数
	long count;

	// 循环变量
	int i,j,k;

	// 中间变量
	int bfsize,p;

	// 角度
	double angle;

	complex<double> *X;

	// 计算付立叶变换点数
	count = 1 << r;

	// 分配运算所需存储器
	//Omega  = new complex<double>[count / 2];
	//X1 = new complex<double>[count];
	//X2 = new complex<double>[count];

	// 计算加权系数
	for(i = 0; i < count / 2; i++)
	{
		angle = -i * 3.1415926 * 2 / count;
		Omega[i] = complex<double>(cos(angle), sin(angle));
	}

	// 将时域点写入X1
	memcpy(X1, TD, sizeof(complex<double>) * count);

	// 采用蝶形算法进行快速付立叶变换
	for(k = 0; k < r; k++)
	{
		for(j = 0; j < 1 << k; j++)
		{
			bfsize = 1 << (r-k);
			for(i = 0; i < bfsize / 2; i++)
			{
				p = j * bfsize;
				X2[i + p] = X1[i + p] + X1[i + p + bfsize / 2];
				X2[i + p + bfsize / 2] = (X1[i + p] - X1[i + p + bfsize / 2]) * Omega[i * (1<<k)];
			}
		}
		X  = X1;
		X1 = X2;
		X2 = X;
	}

	// 重新排序
	for(j = 0; j < count; j++)
	{
		p = 0;
		for(i = 0; i < r; i++)
		{
			if (j&(1<<i))
			{
				p+=1<<(r-i-1);
			}
		}
		FD[j]=X1[p];
	}

	// 释放内存
	//delete Omega;
	//delete X1;
	//delete X2;
}


 

你可能感兴趣的:(算法,框架,cache,null,ini,fft)