96年的夏天,我刚上大一下学期,那时学习的语言是fortan 77,上机是VME中型机,操作系统是Novell,教课的是颇有名气的薛学勤教授,我利用业余时间用这门语言做了两个程序,一个是类似autocad的全屏二维平面设计程序,另一个就是求一百位PI值,很不幸,在写一百位pi值的时候,被薛老师发现了,他告诫我说应该把基础打好,不要一上来就写复杂的程序,这样对自己发展不好。最后的结果是我的写完了这个程序,总共1000行左右,并且把它稍微改动了一下,又求出了100位自然对数的值,并把结果打印到了最后留给我们的打印纸上,同时我也因此被迫写了一份大学唯一的检查。呵呵。 我的那份程序的源代码早已丢失,好在其算法还记得,就是利用计算PI的级数公式: atan(X)=X-X^3/3+X^5/5-... PI=4*(atan(1)) atan(1)=4*atan(1/5)-atan(1/239) 这个公式是我高中的时候看一本《实数》的小册子读到的,很不错,没用多少步(少于30步)就得到了想要的精度。 为了达到100位的精度,四则运算过程都要保持一百位以上的精度,为了简单,特规定了几组30长的整数数组,(乘法需要加倍),每个整数保存10进制的4位值。(Fortan77中整形是16位的) 比如两个数相乘,(fortan语法忘光了,故用C++描述): vector<int> bigmul(vector<int> A, vector<int> B) // a*b { vector<int> result(A.size()+B.size()+1); int i,j; for(i=0; i<B.size(); i++) { if(B[i]==0) continue; for(j=0; j<A.size(); j++) { if(A[j]==0) continue; result[i+j]+=A[j]*B[i]; result[i+j+1]+=result[i+j]/10000; result[i+j]%=10000; } } return result; } 计算pi所用的公式很落后。当然,这是受当时的知识水平所局限。 现代计算PI常用的公式有: 426880 * sqrt(10005) / PI = ∑(0,N)(0,N,(6n)!(545140134n+13591409)/((n!)^3)(3n)!(-640320)^3n) PI=∑(0,N)(1,N,(1+1(4n*n-1)))/sum(1,N,1/(4*n*n-1)) PI=∑(0,N)(4/(8n+1)-2/(8n+4)-1/(8n+5)-1/(8n+6))/16^n 数字切分则有更高效的二进制切分算法: /* * Compute the numerator P and denominator Q of * P/Q = 1/(N0+1) + 1/(N0+1)/(N0+2) + ... + 1/(N0+1)/.../N1 */ void BinarySplittingE(long N0, long N1, BigInt * P, BigInt * Q) { BigInt PP, QQ; long NMid; if (N1-N0==1) { P->Size = Q->Size = 1; P->Coef[0] = 1.; Q->Coef[0] = (double) N1; UpdateBigInt(P); UpdateBigInt(Q); return; } NMid = (N0+N1)/2; BinarySplittingE(N0,NMid,P,Q); /* To save memory, take the non used coefficients of P and Q for coefficient of the BigInt used in the second splitting part */ PP.Coef = P->Coef + P->Size; PP.SizeMax = P->SizeMax-P->Size; QQ.Coef = Q->Coef + Q->Size; QQ.SizeMax = Q->SizeMax-Q->Size; BinarySplittingE(NMid,N1,&PP,&QQ); MulBigInt(P,&QQ,&tmpBigInt); AddBigInt(&tmpBigInt,&PP,P); MulBigInt(Q,&QQ,Q); } 如果大家对大数运算有了解的话,就会很清楚,这个算法太弱了, 利用对大数做fft变换后,利用其系数正交性,运算复杂度会从O(n^2)降低到O(nLog(n)). #include <stdlib.h> #include <stdio.h> #include <math.h> #define PI 3.1415926535897932384626 typedef struct ComplexTag { Real R,I; } Complex; long FFTLengthMax; Complex * OmegaFFT; Complex * ArrayFFT0, * ArrayFFT1; Complex * ComplexCoef; double FFTSquareWorstError; long AllocatedMemory; void InitializeFFT(long MaxLength) { long i; Real Step; FFTLengthMax = MaxLength; OmegaFFT = (Complex *) malloc(MaxLength/2*sizeof(Complex)); ArrayFFT0 = (Complex *) malloc(MaxLength*sizeof(Complex)); ArrayFFT1 = (Complex *) malloc(MaxLength*sizeof(Complex)); ComplexCoef = (Complex *) malloc(MaxLength*sizeof(Complex)); Step = 2.*PI/(double) MaxLength; for (i=0; 2*i<MaxLength; i++) { OmegaFFT[i].R = cos(Step*(double)i); OmegaFFT[i].I = sin(Step*(double)i); } FFTSquareWorstError=0.; AllocatedMemory = 7*MaxLength*sizeof(Complex)/2; } void RecursiveFFT(Complex * Coef, Complex * FFT, long Length, long Step, long Sign) { long i, OmegaStep; Complex * FFT0, * FFT1, * Omega; Real tmpR, tmpI; if (Length==2) { FFT[0].R = Coef[0].R + Coef[Step].R; FFT[0].I = Coef[0].I + Coef[Step].I; FFT[1].R = Coef[0].R - Coef[Step].R; FFT[1].I = Coef[0].I - Coef[Step].I; return; } FFT0 = FFT; RecursiveFFT(Coef ,FFT0,Length/2,Step*2,Sign); FFT1 = FFT+Length/2; RecursiveFFT(Coef+Step,FFT1,Length/2,Step*2,Sign); Omega = OmegaFFT; OmegaStep = FFTLengthMax/Length; for (i=0; 2*i<Length; i++, Omega += OmegaStep) { /* Recursion formula for FFT : FFT[i] <- FFT0[i] + Omega*FFT1[i] FFT[i+Length/2] <- FFT0[i] - Omega*FFT1[i], Omega = exp(2*I*PI*i/Length) */ tmpR = Omega[0].R*FFT1[i].R-Sign*Omega[0].I*FFT1[i].I; tmpI = Omega[0].R*FFT1[i].I+Sign*Omega[0].I*FFT1[i].R; FFT1[i].R = FFT0[i].R - tmpR; FFT1[i].I = FFT0[i].I - tmpI; FFT0[i].R = FFT0[i].R + tmpR; FFT0[i].I = FFT0[i].I + tmpI; } } /* Compute the complex Fourier Transform of Coef into FFT */ void FFT(Real * Coef, long Length, Complex * FFT, long NFFT) { long i; /* Transform array of real coefficient into array of complex */ for (i=0; i<Length; i++) { ComplexCoef[i].R = Coef[i]; ComplexCoef[i].I = 0.; } for (; i<NFFT; i++) ComplexCoef[i].R = ComplexCoef[i].I = 0.; RecursiveFFT(ComplexCoef,FFT,NFFT,1,1); } /* Compute the inverse Fourier Transform of FFT into Coef */ void InverseFFT(Complex * FFT, long NFFT, Real * Coef, long Length) { long i; Real invNFFT = 1./(Real) NFFT, tmp; RecursiveFFT(FFT, ComplexCoef, NFFT, 1, -1); for (i=0; i<Length; i++) { /* Closest integer to ComplexCoef[i].R/NFFT */ tmp = invNFFT*ComplexCoef[i].R; Coef[i] = floor(0.5+tmp); if ((tmp-Coef[i])*(tmp-Coef[i])>FFTSquareWorstError) FFTSquareWorstError = (tmp-Coef[i])*(tmp-Coef[i]); } } void Convolution(Complex * A, Complex * B, long NFFT, Complex * C) { long i; Real tmpR, tmpI; for (i=0; i<NFFT; i++) { tmpR = A[i].R*B[i].R-A[i].I*B[i].I; tmpI = A[i].R*B[i].I+A[i].I*B[i].R; C[i].R = tmpR; C[i].I = tmpI; } } void MulWithFFT(Real * ACoef, long ASize, Real * BCoef, long BSize, Real * CCoef) { long NFFT = 2; while (NFFT<ASize+BSize) NFFT *= 2; if (NFFT>FFTLengthMax) { printf("Error, FFT Size is too big in MulWithFFT\n"); return; } FFT(ACoef, ASize, ArrayFFT0, NFFT); FFT(BCoef, BSize, ArrayFFT1, NFFT); Convolution(ArrayFFT0,ArrayFFT1,NFFT,ArrayFFT0); InverseFFT(ArrayFFT0,NFFT,CCoef, ASize+BSize-1); } NTT(Number theoretic transforms)则巧妙的使用了孫子定理和质数性质, 将运算过程限定在一个特定的整数范围,避免了FFT快速乘法因浮点运算而带来的误差。基本思想则和上者相同。 // The core of the apfloat multiplication // Return a new apstruct apstruct *apmul (apstruct *a, apstruct *b) { int i, sign; size_t n, prec, size, rsize; apstruct *ap; size_t trueasize, truebsize; assert (a); // Won't work on uninitialized apfloats assert (b); sign = a->sign * b->sign; if (!sign) return new apstruct; // Zero prec = min (a->prec, b->prec); size = min (prec, a->size + b->size); trueasize = min (a->size, prec); truebsize = min (b->size, prec); n = rnd23up (trueasize + truebsize); rsize = min (n, size + 2); if (a == b) ap = autoconvolution (a, rsize, trueasize, n, &i); else ap = convolution (a, b, rsize, trueasize, truebsize, n, &i); size -= lastzeros (ap, size); ap->relocate (DEFAULT, size); ap->sign = sign; ap->prec = prec; ap->exp = a->exp + b->exp - 1 + i; return ap; } // Round up to the nearest power of two or three times a power of two size_t rnd23up (size_t x) { size_t r = 1; if (!x) return 0; while (r < x) { if (r == 1) r = 2; else if (r == (r & -r)) r = r / 2 * 3; else r = r / 3 * 4; } return r; } // Convolution of the mantissas in s1 and s2 // Use sizes s1size and s2size correspondingly // Result size is rsize // Transform length is n // *i = 1 if right shift occurred, otherwise 0 apstruct *convolution (apstruct *s1, apstruct *s2, size_t rsize, size_t s1size, size_t s2size, size_t n, int *i) { int location = (n > Maxblocksize ? DISK : MEMORY); apstruct *tmp1; apstruct *tmp2; apstruct *tmp3; apstruct *tmp4; if (MAXTRANSFORMLENGTH) assert (n <= MAXTRANSFORMLENGTH); // Otherwise it won't work setmodulus (moduli[2]); tmp2 = new apstruct (*s2, s2size, location, n); if (location != MEMORY) { fstream &fs = tmp2->openstream (); tabletwopassfnttrans (fs, primitiveroots[2], 1, n); tmp2->closestream (); } else { modint *data = tmp2->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[2], 1, n); tmp2->putdata (); tmp2->relocate (DEFAULT); } tmp1 = new apstruct (*s1, s1size, location, n); if (location != MEMORY) { fstream &fs = tmp1->openstream (); tabletwopassfnttrans (fs, primitiveroots[2], 1, n); tmp1->closestream (); } else { modint *data = tmp1->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[2], 1, n); tmp1->putdata (); } multiplyinplace (tmp1, tmp2); delete tmp2; if (location != MEMORY) { fstream &fs = tmp1->openstream (); itabletwopassfnttrans (fs, primitiveroots[2], -1, n); tmp1->closestream (); } else { modint *data = tmp1->getdata (0, n); itablesixstepfnttrans (data, primitiveroots[2], -1, n); tmp1->putdata (); tmp1->relocate (DEFAULT); } setmodulus (moduli[1]); tmp3 = new apstruct (*s2, s2size, location, n); if (location != MEMORY) { fstream &fs = tmp3->openstream (); tabletwopassfnttrans (fs, primitiveroots[1], 1, n); tmp3->closestream (); } else { modint *data = tmp3->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[1], 1, n); tmp3->putdata (); tmp3->relocate (DEFAULT); } tmp2 = new apstruct (*s1, s1size, location, n); if (location != MEMORY) { fstream &fs = tmp2->openstream (); tabletwopassfnttrans (fs, primitiveroots[1], 1, n); tmp2->closestream (); } else { modint *data = tmp2->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[1], 1, n); tmp2->putdata (); } multiplyinplace (tmp2, tmp3); delete tmp3; if (location != MEMORY) { fstream &fs = tmp2->openstream (); itabletwopassfnttrans (fs, primitiveroots[1], -1, n); tmp2->closestream (); } else { modint *data = tmp2->getdata (0, n); itablesixstepfnttrans (data, primitiveroots[1], -1, n); tmp2->putdata (); tmp2->relocate (DEFAULT); } setmodulus (moduli[0]); tmp4 = new apstruct (*s2, s2size, location, n); if (location != MEMORY) { fstream &fs = tmp4->openstream (); tabletwopassfnttrans (fs, primitiveroots[0], 1, n); tmp4->closestream (); } else { modint *data = tmp4->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[0], 1, n); tmp4->putdata (); tmp4->relocate (DEFAULT); } tmp3 = new apstruct (*s1, s1size, location, n); if (location != MEMORY) { fstream &fs = tmp3->openstream (); tabletwopassfnttrans (fs, primitiveroots[0], 1, n); tmp3->closestream (); } else { modint *data = tmp3->getdata (0, n); tablesixstepfnttrans (data, primitiveroots[0], 1, n); tmp3->putdata (); } multiplyinplace (tmp3, tmp4); delete tmp4; if (location != MEMORY) { fstream &fs = tmp3->openstream (); itabletwopassfnttrans (fs, primitiveroots[0], -1, n); tmp3->closestream (); } else { modint *data = tmp3->getdata (0, n); itablesixstepfnttrans (data, primitiveroots[0], -1, n); tmp3->putdata (); } *i = carrycrt (tmp3, tmp2, tmp1, rsize); delete tmp1; delete tmp2; // Return value can remain in memory return tmp3; } // Linear multiplication in the number theoretic domain // Assume that ds1 will be in memory if possible void multiplyinplace (apstruct *ds1, apstruct *s2) { size_t t, p = ds1->size, l, r = 0; modint *buf1, *buf2; while (p) { l = (p < Blocksize ? p : Blocksize); p -= l; buf1 = ds1->getdata (r, l); buf2 = s2->getdata (r, l); r += l; for (t = 0; t < l; t++) buf1[t] *= buf2[t]; ds1->putdata (); s2->cleardata (); } }