【洛谷P4245】【模板】 任意模数NTT(MTT)

传送门


首先是一种只用两次 D F T DFT DFT的卷积写法

若求 A ( x ) ∗ B ( x ) A(x)*B(x) A(x)B(x)

P ( x ) = A ( x ) + i B ( x ) P(x)=A(x)+iB(x) P(x)=A(x)+iB(x)
Q ( x ) = A ( x ) − i B ( x ) Q(x)=A(x)-iB(x) Q(x)=A(x)iB(x)

那么如果 P ( x ) I D F T P(x)\mathrm{IDFT} P(x)IDFT求出来
P [ k ] ′ = ∑ j = 0 L − 1 A j ω l j k + i B j ω l j k P'_{[k]}=\sum_{j=0}^{L-1}A_j\omega_{l}^{jk}+iB_j\omega_{l}^{jk} P[k]=j=0L1Ajωljk+iBjωljk
= ∑ j = 0 L − 1 ( A j + i B j ) ( cos ⁡ X + i sin ⁡ X ) , X = 2 π j k l =\sum_{j=0}^{L-1}(A_j+iB_j)(\cos_{X}+i\sin_{X}),X=\frac{2\pi jk}{l} =j=0L1(Aj+iBj)(cosX+isinX)X=l2πjk

Q k ′ = ∑ j = 0 L − 1 ( A j − i B j ) ( cos ⁡ X + i sin ⁡ X ) Q'_{k}=\sum_{j=0}^{L-1}(A_j-iB_j)(\cos_X+i\sin_X) Qk=j=0L1(AjiBj)(cosX+isinX)
= ∑ j ( A j cos ⁡ X + B j sin ⁡ X ) − i ( B j cos ⁡ X − A j sin ⁡ X ) =\sum_{j}(A_j\cos_X+B_j\sin_X)-i(B_j\cos_X-A_j\sin_X) =j(AjcosX+BjsinX)i(BjcosXAjsinX)
= c o n j ( ∑ j ( A j cos ⁡ X + B j sin ⁡ X ) + i ( B j cos ⁡ X − A j sin ⁡ X ) ) =conj(\sum_j(A_j\cos_X+B_j\sin_X)+i(B_j\cos_X-A_j\sin_X)) =conj(j(AjcosX+BjsinX)+i(BjcosXAjsinX))
= c o n j ( ∑ j ( A j cos ⁡ − X − B j sin ⁡ − X ) + i ( B j cos ⁡ − X + A j sin ⁡ − X ) ) =conj(\sum_j(A_j\cos_{-X}-B_j\sin_{-X})+i(B_j\cos_{-X}+A_j\sin_{-X})) =conj(j(AjcosXBjsinX)+i(BjcosX+AjsinX))
= c o n j ( ( A j + i B j ) ( i sin ⁡ − X + cos ⁡ − X ) ) =conj((A_j+iB_j)(i\sin_{-X}+\cos_{-X})) =conj((Aj+iBj)(isinX+cosX))
= c o n j ( P [ l − k ] ′ ) =conj(P'_{[l-k]}) =conj(P[lk])

于是只用一次 D F T \mathrm{DFT} DFT即可求出 P ′ , Q ′ P',Q' P,Q
然后 A ′ = P ′ + Q ′ 2 , B ′ = i Q ′ − P ′ 2 A'=\frac{P'+Q'}{2},B'=i\frac{Q'-P'}{2} A=2P+Q,B=i2QP
然后一次 I D F T \mathrm{IDFT} IDFT即可

另外一个关于 F F T FFT FFT优化
实际上对于 I D F T \mathrm{IDFT} IDFT,做完之后应该是只剩实数部分的
所以可以把两个多项式 A , B A,B A,B做成 A + i B A+iB A+iB的形式 I D F T \mathrm{IDFT} IDFT回来
这样可以在对于多个多项式 I D F T \mathrm{IDFT} IDFT的时候简化

对于任意模数,用 F F T FFT FFT做的问题是会炸精度
于是考虑把值拆成 A 1 ∗ 2 15 + A 2 A_1*2^{15}+A_2 A1215+A2的形式

相当于是对 4 4 4个多项式变换
首先 D F T \mathrm{DFT} DFT可以只做两次了
然后 I D F T \mathrm{IDFT} IDFT可以利用合并的技巧也只用做两次
常数不是很大

另外预处理单位根
每次乘 ω n \omega_n ωn也很耗费精度
所以每隔一些就重新用 sin ⁡ , cos ⁡ \sin,\cos sin,cos计算

#include
using namespace std;
#define cs const
#define pb push_back
#define pii pair
#define fi first
#define se second
#define ll long long
#define re register
cs int RLEN=1<<20|1;
inline char gc(){
	static char ibuf[RLEN],*ib,*ob;
	(ib==ob)&&(ob=(ib=ibuf)+fread(ibuf,1,RLEN,stdin));
	return (ib==ob)?EOF:*ib++;
}
inline int read(){
	char ch=gc();
	int res=0;bool f=1;
	while(!isdigit(ch))f^=ch=='-',ch=gc();
	while(isdigit(ch))res=(res+(res<<2)<<1)+(ch^48),ch=gc();
	return f?res:-res;
}
int mod;
struct plx{
	double x,y;
	plx(double _x=0,double _y=0):x(_x),y(_y){}
	friend inline plx operator +(cs plx &a,cs plx &b){
		return plx(a.x+b.x,a.y+b.y);
	}
	friend inline plx operator -(cs plx &a,cs plx &b){
		return plx(a.x-b.x,a.y-b.y);
	}
	friend inline plx operator *(cs plx &a,cs plx &b){
		return plx(a.x*b.x-a.y*b.y,a.x*b.y+a.y*b.x);
	}
	inline plx conj()cs{return plx(x,-y);}
};
#define poly vector
cs int C=19,M=(1<<15)-1,N=400005;
cs double pi=acos(-1);
poly w[C+1];
int rev[(1<<C)|5];
inline void init_rev(int lim){
	for(int i=0;i<lim;i++)rev[i]=(rev[i>>1]>>1)|((i&1)*(lim>>1));
}
inline void init_w(){
	for(int i=1;i<=C;i++)w[i].resize(1<<(i-1));
	plx wn=plx(cos(pi/(1<<(C-1))),sin(pi/(1<<(C-1))));
	w[C][0]=plx(1,0);
	for(int i=1;i<(1<<(C-1));i++){
		if(i&31)w[C][i]=w[C][i-1]*wn;
		else w[C][i]=plx(cos(pi*i/(1<<(C-1))),sin(pi*i/(1<<(C-1))));
	}
	for(int i=C-1;i;i--)
	for(int j=0;j<(1<<(i-1));j++)w[i][j]=w[i+1][j<<1];
}
inline void fft(plx *f,int lim,int kd){
	for(int i=0;i<lim;i++)if(i>rev[i])swap(f[i],f[rev[i]]);
	plx a0,a1;
	for(int mid=1,l=1;mid<lim;mid<<=1,l++)
	for(int i=0;i<lim;i+=(mid<<1))
	for(int j=0;j<mid;j++)
	a0=f[i+j],a1=f[i+j+mid]*w[l][j],f[i+j]=a0+a1,f[i+j+mid]=a0-a1;
	if(kd==-1){
		reverse(f+1,f+lim);
		for(int i=0;i<lim;i++)f[i].x/=lim,f[i].y/=lim;
	}
}
inline void mul(int *A,int *B,int lim,int *ret){
	static plx a[(1<<C)|5],b[(1<<C)|5],c[(1<<C)|5],d[(1<<C)|5],da,db,dc,dd;
	for(int i=0;i<lim;i++)a[i]=plx(A[i]&M,A[i]>>15),b[i]=plx(B[i]&M,B[i]>>15);
	init_rev(lim);
	fft(a,lim,1),fft(b,lim,1);
	for(int i=0;i<lim;i++){
		int j=(lim-i)&(lim-1);
		da=(a[i]+a[j].conj())*plx(0.5,0);
		db=(a[j].conj()-a[i])*plx(0,0.5);
		dc=(b[i]+b[j].conj())*plx(0.5,0);
		dd=(b[j].conj()-b[i])*plx(0,0.5);
		c[i]=(da*dc)+((da*dd)*plx(0,1));
		d[i]=(db*dd)+((db*dc)*plx(0,1));
	}
	fft(c,lim,-1),fft(d,lim,-1);
	for(int i=0;i<lim;i++){
		ll da=(ll)(d[i].x+0.5)%mod,db=(ll)(d[i].y+0.5)%mod,dc=(ll)(c[i].y+0.5)%mod,dd=(ll)(c[i].x+0.5)%mod;
		ret[i]=((da<<30)+((db+dc)<<15)+dd)%mod;
	}
}
int n,m,a[N],b[N],lim,ans[N];
int main(){
	#ifdef Stargazer
	freopen("lx.in","r",stdin);
	freopen("my.out","w",stdout);
	#endif
	init_w();
	n=read()+1,m=read()+1,mod=read();
	for(int i=0;i<n;i++)a[i]=read()%mod;
	for(int i=0;i<m;i++)b[i]=read()%mod;
	lim=1;
	while(lim<(n+m))lim<<=1;
	mul(a,b,lim,ans);
	for(int i=0;i<n+m-1;i++)cout<<ans[i]<<" ";
}

你可能感兴趣的:(多项式)