在数学中进行傅里叶变换为连续模拟信号,通常来说:
二维连续函数f(x,y)的傅里叶正变换为:
相应的傅里叶逆变换公式为:
但是在计算机领域,计算机一般处理的是数字信号,只能进行有限次计算,因此将这种受限下的傅里叶变换成为离散傅里叶变换(Discrete Fourier Transform,DFT)。
二维离散函数f(x,y)的傅里叶正变换的公式如下:
这里列出来书本上介绍的关于傅里叶变换的一些性质,仅供参考,不做详述。
参考上述傅里叶正变换给出的公式,这里给出其python实现如下:
def DFT2D(x, shift=True):
'''
Discrete space fourier transform
x: Input matrix
'''
pi2 = 2*np.pi
N1, N2 = x.shape
X = np.zeros((N1, N2), dtype=np.complex64)
n1, n2 = np.mgrid[0:N1, 0:N2]
for w1 in range(N1):
for w2 in range(N2):
j2pi = np.zeros((N1, N2), dtype=np.complex64)
j2pi.imag = pi2*(w1*n1/N1 + w2*n2/N2)
X[w1, w2] = np.sum(x*np.exp(-j2pi))
if shift:
X = np.roll(X, N1//2, axis=0)
X = np.roll(X, N2//2, axis=1)
return X
参考上述傅里叶逆变换给出的公式,这里给出其python实现如下:
def iDFT2D(X, shift=True):
'''
Inverse discrete space fourier transform
X: Complex matrix
'''
pi2 = 2*np.pi
N1, N2 = X.shape
x = np.zeros((N1, N2))
k1, k2 = np.mgrid[0:N1, 0:N2]
if shift:
X = np.roll(X, -N1//2, axis=0)
X = np.roll(X, -N2//2, axis=1)
for n1 in range(N1):
for n2 in range(N2):
j2pi = np.zeros((N1, N2), dtype=np.complex64)
j2pi.imag = pi2*(n1*k1/N1 + n2*k2/N2)
x[n1, n2] = abs(np.sum(X*np.exp(j2pi)))
return 1/(N1*N2)*x
这里给出测试用例,如下:
import matplotlib.pyplot as plt
import numpy as np
import imageio
from _utils import *
image = imageio.imread('./sample/cameraman.png')
s = 4
image = image[::s, ::s]/255
N1, N2 = image.shape
IMAGE = DFT2D(image)
xX = np.array([image, np.log10(1 + abs(IMAGE))])
panel(xX, [2, 1], text_color='green',
texts=['Input image', 'Spectrum'])
image_ = iDFT2D(IMAGE)
Xx_ = np.array([np.log10(1 + abs(IMAGE)), image_])
panel(Xx_, [2, 1], text_color='green',
texts=['Spectrum', 'Reconstructed image'])
上述Python代码的运行时间复杂度为 O(N^2),图像较大时,运行贼慢。这里考虑用cuda对其加速,搜索了半天,发现有大佬曾经做过类似的实现,并给出了源码,这里直接参考其实现对其进行简单的封装,可以调用完成图像的傅里叶变换以及视频的傅里叶变换。
这里给出FFT.cu的核心代码片段:
#include "fft.h"
#include "cuda_runtime.h"
__device__ unsigned char getr(float x) {
return (tanh((x - 0.375f) * 6.0f) + 1.0f) * 127.0f;
}
__device__ unsigned char getg(float x) {
return (tanh((x - 0.6250f) * 6.0f) + 1.0f) * 127.0f;
}
__device__ unsigned char getb(float x) {
return (exp(-20.0f * (x - 0.25f) * (x - 0.25f) - 2.0f * exp(-(x + 0.05f) * (x + 0.05f) * 144.0f)) * 0.5f + 1.0f + tanh((x - 0.875f) * 6.0f)) * 127.0f;
}
__global__ void imgfill(float2* d_k, uchar3* d_img,int size)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int imgx, imgy;
imgx = (x >= size / 2) ? x - size / 2 : x + size / 2;
imgy = (y >= size / 2) ? y - size / 2 : y + size / 2;
float2 k = d_k[y * size + x];
float in = k.x * k.x + k.y * k.y;
in = log(in * (1.0f / 256.0f/size) + 0.8f) * 0.07f;
uchar3 c;
c.x = getb(in);
c.y = getg(in);
c.z = getr(in);
d_img[imgy * size + imgx] = c;
}
__global__ void fill(float2* d_x, uchar3* d_8uc3,int size,int w,int h) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int imgx, imgy;
float cx, cy;
unsigned char r;
if (x >= size / 2 + w / 2) {
imgx = 0;
cx = size - x;
cx = exp(-cx * cx * (1.0f / 1024.0f));
}
else if (x < size / 2 + w / 2 && x >= w) {
imgx = w - 1;
cx = x - w;
cx = exp(-cx * cx * (1.0f / 1024.0f));
}
else {
imgx = x;
cx = 1.0f;
}
if (y >= size / 2 + h / 2) {
imgy = 0;
cy = size - y;
cy = exp(-cy * cy * (1.0f / 1024.0f));
}
else if (y < size / 2 + h / 2 && y >= h) {
imgy = h - 1;
cy = y - h;
cy = exp(-cy * cy * (1.0f / 1024.0f));
}
else {
imgy = y;
cy = 1.0f;
}
r = d_8uc3[imgy * w + imgx].x;
d_x[y * size + x].x = r * cx * cy;
d_x[y * size + x].y = 0;
}
void fft_tranformer(uchar3 * d_8uc3,float2 * d_x,float2 * d_k,
uchar3 *d_img,cufftHandle *fftPlan,
unsigned char * pframe,unsigned char * pDst,
int width,int height,int size)
{
cudaMemcpy(d_8uc3, pframe, width * height * 3, cudaMemcpyHostToDevice);
fill << < dim3(size / 128, size, 1), dim3(128, 1, 1) >> > (d_x, d_8uc3,size,width,height);
cufftExecC2C(*fftPlan, d_x, d_k, CUFFT_FORWARD);
imgfill << < dim3(size / 128, size, 1), dim3(128, 1, 1) >> > (d_k, d_img,size);
cudaMemcpy(pDst, d_img, size * size * 3, cudaMemcpyDeviceToHost);
}
点我
完整代码可公众号内回复DFT 即可获取
链接一
链接二