FFT与游戏开发(五)

FFT与游戏开发(五)

先放结果展示:

海浪的模拟,可以理解为一堆任意方向的正弦波的叠加,这些正弦波的频谱(相位和振幅)会随着时间而变化。

\[h(\overrightarrow x, t) = \sum_{\overrightarrow k} \tilde h (\overrightarrow k, t) e^{j \overrightarrow k \cdot \overrightarrow x} \]

\[\begin{aligned} 其中: &L_m, L_n为整个M*N个网格的边长 \\ \overrightarrow k &= (k_x, k_z) \\ k_x &= \frac{2\pi m}{L_m}, -\frac{M}{2} \leq m < \frac{M}{2} \\ k_z &= \frac{2\pi n}{L_n}, -\frac{N}{2} \leq n < \frac{N}{2} \\ \overrightarrow x &= \left( \frac{xL_m}{M}, \frac{yL_n}{N} \right) \\ & 0 \leq x < M \\ & 0 \leq y < N \\ \end{aligned} \]

这里需要说明下,这个式子和标准2D iDFT长得不太一样:

\[x(x, y) = \frac{1}{MN} \sum_{m=0}^{M-1}\sum_{n=0}^{N-1} X(m, n) e^{j2\pi (\frac{mx}{M} + \frac{ny}{N})} \]

这是因为:

  1. \(\overrightarrow k \cdot \overrightarrow x\)的结果就是$ 2\pi (\frac{mx}{M} + \frac{ny}{N}) $这里和公式是一样的
  2. 求和的范围不一样,标准公式是从0-M、0-N,而这里是\(-\frac{N}{2} \leq m < \frac{N}{2}\)\(-\frac{N}{2} \leq n < \frac{N}{2}\)这里推导下

\[设\overrightarrow k = \left( \frac{2\pi m - \pi M}{L_m}, \frac{2\pi N - \pi N}{L_n} \right); 0\leq m < M, 0\leq n < N \]

\[\begin{aligned} h(x, y, t) &= \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} \tilde h(m,n,t) \exp\left( j \overrightarrow k \cdot \overrightarrow x \right) \\ &= \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} \tilde h(m,n,t) \exp\left( j \frac{2\pi mx - \pi Mx}{M} + j \frac{2\pi ny - \pi Ny}{N} \right) \\ &= \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} \tilde h(m,n,t) \exp\left( j2\pi \left( \frac{mx}{M} + j \frac{ny}{N} \right) \right) \exp{(-j\pi(x+y))} \\ &= (-1)^{x+y} \frac{1}{MN} \sum_{m=0}^{M-1} \sum_{n=0}^{N-1} \tilde h(m,n,t) \exp\left( j2\pi \left( \frac{mx}{M} + j \frac{ny}{N} \right) \right) \\ \end{aligned} \]

最后相当于是在标准的iFFT上面乘了一个\((-1)^{x+y}\),之前程序没有这么写,直觉上L的参数总是不太对,L比较大的时候,海浪应该比较密集,L比较小的时候,比较平缓,改成这样就符合直觉了。

飞利浦频谱(Phillips Spectrum)

我们有了从海浪频域转到海浪时域的工具,那么海浪的频域是怎么来的呢?通过海洋统计学,可以得到海浪的频谱随着时间变化的函数:

\[\tilde h(\overrightarrow k, t) = \tilde{h_0}(\overrightarrow k) e^{it\sqrt{gk}} + \tilde{h_0^*}(-\overrightarrow k) e^{-it\sqrt{gk}} \begin{aligned} 其中: &L为整个N*N个网格的边长 \\ &k_x = \frac{2\pi m}{L}, -\frac{N}{2} \leq m < \frac{N}{2}\\ &k_z = \frac{2\pi n}{L}, -\frac{N}{2} \leq n < \frac{N}{2}\\ &\overrightarrow k = (k_x, k_z)\\ &k = |\overrightarrow k| \\ \\ 另外: &\tilde{h_0}(\overrightarrow k) = \frac{1}{\sqrt 2} (\xi_r + \xi_i) \sqrt{P_h(\overrightarrow k)} \\ &P_h(\overrightarrow k) = A\frac{e^{-\frac{g^2}{k^2V^4}}}{k^4} |\overrightarrow k \cdot \overrightarrow \omega| ^ 2 \\ \overrightarrow \omega:&风向 \\ V:&风速 \\ \end{aligned} \]

可以看到,这个函数巨复杂无比,不过好在我们也是可以让它跑在GPU上的。

HLSL实现

得到了波浪的频谱,就可以动手开始实现了,HLSL实现如下:

#pragma kernel iFFT2x
#pragma kernel iFFT2y
#pragma kernel GenerateSpectrum
#pragma kernel GenerateTwinRandomGaussian
#pragma kernel GenerateSpectrumStepOne
#pragma kernel GenerateSpectrumStepTwo

static const uint FFT_STAGES = 8;
static const uint FFT_DIMENSION = 1 << FFT_STAGES;
static const uint FFT_BUTTERFLYS = FFT_DIMENSION >> 1;

static const float PI = 3.14159265;
groupshared float2 pingPongArray[FFT_DIMENSION * 2];

uint ReverseBits(uint index, uint count) {
	return reversebits(index) >> (32 - count);
}

float2 ComplexMultiply(float2 a, float2 b) {
	return float2(a.x * b.x - a.y * b.y, a.y * b.x + a.x * b.y);
}

void ButterFlyOnce(float2 input0, float2 input1, float2 twiddleFactor, out float2 output0, out float2 output1) {
	float2 t = ComplexMultiply(twiddleFactor, input1);
	output0 = input0 + t;
	output1 = input0 - t;
}

float2 Euler(float theta) {
	float2 ret;
	sincos(theta, ret.y, ret.x);
	return ret;
}

Texture2D SrcTex;
RWTexture2D DstTex;

void iFFT2(uint2 id, bool horizontal)
{
	uint butterFlyID = horizontal ? id.x : id.y;
	uint index0 = butterFlyID * 2;
	uint index1 = butterFlyID * 2 + 1;
	if (horizontal) {
		pingPongArray[index0] = SrcTex[uint2(ReverseBits(index0, FFT_STAGES), id.y)];
		pingPongArray[index1] = SrcTex[uint2(ReverseBits(index1, FFT_STAGES), id.y)];
	} else {
		pingPongArray[index0] = SrcTex[uint2(id.x, ReverseBits(index0, FFT_STAGES))];
		pingPongArray[index1] = SrcTex[uint2(id.x, ReverseBits(index1, FFT_STAGES))];
	}

	uint2 offset = uint2(0, FFT_DIMENSION);
	[unroll]
	for (uint s = 1; s <= FFT_STAGES; s++) {
		GroupMemoryBarrierWithGroupSync();
		// 每个stage中独立的FFT的宽度
		uint m = 1 << s;
		uint halfWidth = m >> 1;
		// 属于第几个iFFT
		uint nFFT = butterFlyID / halfWidth;
		// 在iFFT中属于第几个输入
		uint k = butterFlyID % halfWidth;
		index0 = k + nFFT * m;
		index1 = index0 + halfWidth;
		if (s != FFT_STAGES) {
			ButterFlyOnce(
				pingPongArray[offset.x + index0], pingPongArray[offset.x + index1],
				Euler(2 * PI * k / m),
				pingPongArray[offset.y + index0], pingPongArray[offset.y + index1]);
			offset.xy = offset.yx;
		} else {
			float2 output0;
			float2 output1;
			ButterFlyOnce(
				pingPongArray[offset.x + index0], pingPongArray[offset.x + index1],
				Euler(2 * PI * k / m),
				output0, output1);
			output0 /= FFT_DIMENSION;
			output1 /= FFT_DIMENSION;
			if (horizontal) {
				DstTex[uint2(index0, id.y)] = output0;
				DstTex[uint2(index1, id.y)] = output1;
			} else {
				output0 = (id.x + id.y) % 2 == 0 ? output0 : -output0;
				output1 = (id.x + id.y) % 2 == 0 ? output1 : -output1;
				DstTex[uint2(id.x, index0)] = output0;
				DstTex[uint2(id.x, index1)] = output1;
			}
		}
	}
}

[numthreads(FFT_BUTTERFLYS, 1, 1)]
void iFFT2x(uint3 id : SV_DispatchThreadID) {
	iFFT2(id.xy, true);
}

[numthreads(1, FFT_BUTTERFLYS, 1)]
void iFFT2y(uint3 id : SV_DispatchThreadID) {
	iFFT2(id.xy, false);
}

const static float G = 9.8;

float Pow2(float x) { return x * x; }
float Pow4(float x) { return x * x * x * x; }

float2 H0(float2 k_v, float k, float2 w, float V, float2 xi, float sqrtA) {
	// P_h(\overrightarrow k) = A\frac{e^{-1/(kL)^2}}{k^4} |\overrightarrow k \cdot \overrightarrow \omega| ^ 2
	// L = \frac{V^2}{g}
	float sqrtPh = sqrtA * exp(-Pow2(G / (k * V * V)) / 2) * abs(dot(k_v, w)) / Pow2(k);
	return xi * sqrtPh / sqrt(2);
}

float2 PhillipsSpectrum(
	float2 k_v,
	float sqrtA, // 常数
	float t, // 时间
	float2 w, // 风向
	float V, // 风速
	float2 xi) // 随机数
{
	// 这部分可以预计算
	float k = length(k_v);
	float2 h0 = H0(k_v, k, w, V, xi, sqrtA);
	float2 h0_adj = H0(-k_v, k, w, V, xi, sqrtA) * float2(1, -1);

	// \tilde{h_0}(\overrightarrow k) e^{it\sqrt{gk}} +	\tilde{ h_0^* }(-\overrightarrow k) e^ { -it\sqrt{ gk } }
	half tsqrtGk = t * sqrt(G * k);
	float2 h = ComplexMultiply(h0, Euler(tsqrtGk)) + ComplexMultiply(h0_adj, Euler(-tsqrtGk));

	h.x = isnan(h.x) ? 0 : h.x;
	h.y = isnan(h.y) ? 0 : h.y;
	return h;
}

float RadicalInverse_VdC(uint bits) {
	bits = (bits << 16u) | (bits >> 16u);
	bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
	bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
	bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
	bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
	return bits * 2.3283064365386963e-10f;
}

RWTexture2D SpectrumTex;
float _Time;
float _SqrtAmplitude;
float2 _WindDirection;
float _WindSpeed;
float _PatchLength;


[numthreads(4, 4, 1)]
void GenerateSpectrum(uint3 id : SV_DispatchThreadID) {
	float2 rand = float2(RadicalInverse_VdC(id.x + 1), RadicalInverse_VdC(id.y + 1));
	float2 xi;
	sincos(2 * PI * rand.y, xi.x, xi.y);
	xi *= sqrt(-2 * log(rand.x));

	float2 mn = (float2)id.xy - FFT_DIMENSION / 2;
	float L = _PatchLength;
	float2 k_v = 2 * PI * mn / L;

	SpectrumTex[id.xy] = 
		PhillipsSpectrum(k_v, _SqrtAmplitude, _Time, _WindDirection, _WindSpeed, xi);
}

RWTexture2D TwinRandomGaussianTexOutput;

[numthreads(4,4,1)]
void GenerateTwinRandomGaussian(uint3 id : SV_DispatchThreadID) {
	float2 rand = float2(RadicalInverse_VdC(id.x + 1), RadicalInverse_VdC(id.y + 1));
	float2 xi;
	sincos(2 * PI * rand.y, xi.x, xi.y);
	xi *= sqrt(-2 * log(rand.x));
	TwinRandomGaussianTexOutput[id.xy] = xi;
}

Texture2D TwinRandomGaussianTexInput;
RWTexture2D SpectrumStepOneOutput;

[numthreads(4,4,1)]
void GenerateSpectrumStepOne(uint3 id : SV_DispatchThreadID) {
	float sqrtA = _SqrtAmplitude;
	float2 w = _WindDirection;
	float V = _WindSpeed;

	float2 xi = TwinRandomGaussianTexInput[id.xy];

	float2 mn = (float2)id.xy - FFT_DIMENSION / 2;
	float L = _PatchLength;
	float2 k_v = 2 * PI * mn / L;

	float k = length(k_v);

	float2 h0 = H0(k_v, k, w, V, xi, sqrtA);
	float2 h0_adj = H0(-k_v, k, w, V, xi, sqrtA) * float2(1, -1);

	SpectrumStepOneOutput[id.xy] = float4(h0, h0_adj);
}

Texture2D SpectrumStepOneInput;
RWTexture2D SpectrumStepTwoOutput;

[numthreads(4, 4, 1)]
void GenerateSpectrumStepTwo(uint3 id : SV_DispatchThreadID) {
	float4 input = SpectrumStepOneInput[id.xy];
	float2 h0 = input.xy;
	float2 h0_adj = input.zw;

	float t = _Time;

	float2 mn = (float2)id.xy - FFT_DIMENSION / 2;
	float L = _PatchLength;
	float2 k_v = 2 * PI * mn / L;

	float k = length(k_v);

	half tsqrtGk = t * sqrt(G * k);
	float2 h = ComplexMultiply(h0, Euler(tsqrtGk)) + ComplexMultiply(h0_adj, Euler(-tsqrtGk));

	h.x = isnan(h.x) ? 0 : h.x;
	h.y = isnan(h.y) ? 0 : h.y;
	
	SpectrumStepTwoOutput[id.xy] = h;
}

有几点需要注意下:

  1. 原本是想利用Hammersley Sequence做GPU上的随机数生成,但是这样发现还是出现了一些重复的pattern,于是还是放在CPU端去做。
  2. 生成Phillips Spectrum可以抽出时间参数t,预计算一部分(GenerateSpectrumStepOne),减少每帧需要做的计算量。

参考资料

  1. TESSENDORF, J., 2001. Simulating ocean waters. In SIGGRAPH course notes (course 47), ACM SIGGRAPH
  2. https://zhuanlan.zhihu.com/p/64414956
  3. Fynn-Jorin Flügge, Realtime GPGPU FFT Ocean Water Simulation

你可能感兴趣的:(FFT与游戏开发(五))