wiki链接:http://en.wikipedia.org/wiki/Haar_wavelet
可用SSE2实现HAAR小波变换,达到实时,关于HAAR小波的介绍可参考以上维基链接
参考MATLAB中dwt2与idwt2的函数原型,基于OpenCV的框架进行了汇编优化实现
HAAR小波也可用于图像的压缩,将CH,CV,CD中的分量值小于某一阈值则归为0,从而这三个矩阵将成为稀疏矩阵(Sparse Matrix),反变换后的图像的质量将取决于选择阈值的大小。
实际实现时,可选择使用浮点数(单精度或双精度)进行矩阵计算,使用整数计算能得到更快的速度但不能进行完整的压缩与解压缩。
附代码:
inline void dwt2_row(__out double* ca0, __out double* ch0, __out double* cv0, __out double* cd0, __in unsigned char* row0, __in unsigned char* row1, __in int col) { __asm { mov eax_ptr, ca0; mov ebx_ptr, ch0; mov ecx_ptr, cv0; mov edx_ptr, cd0; mov esi_ptr, row0; mov edi_ptr, row1; pxor xmm3, xmm3; movapd xmm7, g_halfd; sub col, 4; jl loop_2; loop_4: movd xmm1, [esi_ptr]; movd xmm5, [edi_ptr]; punpcklbw xmm1, xmm3; punpcklbw xmm5, xmm3; punpcklwd xmm1, xmm3; punpcklwd xmm5, xmm3; cvtdq2pd xmm0, xmm1; cvtdq2pd xmm4, xmm5; shufpd xmm1, xmm1, 1; shufpd xmm5, xmm5, 1; cvtdq2pd xmm1, xmm1; cvtdq2pd xmm5, xmm5; addpd xmm4, xmm0; addpd xmm5, xmm1; mulpd xmm4, xmm7; mulpd xmm5, xmm7; subpd xmm0, xmm4; subpd xmm1, xmm5; movapd xmm6, xmm4; movapd xmm2, xmm0; shufpd xmm4, xmm5, 0; shufpd xmm6, xmm5, 3; shufpd xmm0, xmm1, 0; shufpd xmm2, xmm1, 3; addpd xmm6, xmm4; addpd xmm2, xmm0; mulpd xmm6, xmm7; mulpd xmm2, xmm7; subpd xmm4, xmm6; subpd xmm0, xmm2; movupd [eax_ptr], xmm6; movupd [ebx_ptr], xmm4; movupd [ecx_ptr], xmm2; movupd [edx_ptr], xmm0; add esi_ptr, 4; add edi_ptr, 4; add eax_ptr, 0x10; add ebx_ptr, 0x10; add ecx_ptr, 0x10; add edx_ptr, 0x10; sub col, 4; jge loop_4; loop_2: cmp col, -2; jl loop_end; pinsrw xmm0, [esi_ptr], 0; pinsrw xmm4, [edi_ptr], 0; punpcklbw xmm0, xmm3; punpcklbw xmm4, xmm3; punpcklwd xmm0, xmm3; punpcklwd xmm4, xmm3; cvtdq2pd xmm0, xmm0; cvtdq2pd xmm4, xmm4; addpd xmm4, xmm0; mulpd xmm4, xmm7; subpd xmm0, xmm4; movapd xmm5, xmm4; shufpd xmm4, xmm0, 0; shufpd xmm5, xmm0, 3; addpd xmm5, xmm4; mulpd xmm5, xmm7; subpd xmm4, xmm5; movsd [eax_ptr], xmm5; shufpd xmm5, xmm5, 1; movsd [ebx_ptr], xmm4; shufpd xmm4, xmm4, 1; movsd [ecx_ptr], xmm5; movsd [edx_ptr], xmm4; loop_end: } } inline void idwt2_row(__out unsigned char* row0, __out unsigned char* row1, __in double* ca0, __in double* ch0, __in double* cv0, __in double* cd0, __in int col) { __asm { mov eax_ptr, ca0; mov ebx_ptr, ch0; mov ecx_ptr, cv0; mov edx_ptr, cd0; mov esi_ptr, row0; mov edi_ptr, row1; sub col, 4; jl loop_2; loop_4: movupd xmm0, [eax_ptr]; movupd xmm1, [ebx_ptr]; movupd xmm4, [ecx_ptr]; movupd xmm5, [edx_ptr]; addpd xmm1, xmm0; addpd xmm5, xmm4; addpd xmm0, xmm0; addpd xmm4, xmm4; subpd xmm0, xmm1; subpd xmm4, xmm5; movapd xmm2, xmm1; movapd xmm6, xmm5; shufpd xmm1, xmm0, 0; shufpd xmm2, xmm0, 3; shufpd xmm5, xmm4, 0; shufpd xmm6, xmm4, 3; addpd xmm5, xmm1; addpd xmm6, xmm2; addpd xmm1, xmm1; addpd xmm2, xmm2; subpd xmm1, xmm5; subpd xmm2, xmm6; cvttpd2dq xmm5, xmm5; cvttpd2dq xmm6, xmm6; cvttpd2dq xmm1, xmm1; cvttpd2dq xmm2, xmm2; shufpd xmm5, xmm6, 0; shufpd xmm1, xmm2, 0; packssdw xmm5, xmm1; packuswb xmm5, xmm5; pshufd xmm1, xmm5, 1; movd [esi_ptr], xmm5; movd [edi_ptr], xmm1; add esi_ptr, 4; add edi_ptr, 4; add eax_ptr, 0x10; add ebx_ptr, 0x10; add ecx_ptr, 0x10; add edx_ptr, 0x10; sub col, 4; jge loop_4; loop_2: cmp col, -2; jl loop_end; movsd xmm0, [eax_ptr]; movsd xmm1, [ebx_ptr]; movsd xmm4, [ecx_ptr]; movsd xmm5, [edx_ptr]; addpd xmm1, xmm0; addpd xmm5, xmm4; addpd xmm0, xmm0; addpd xmm4, xmm4; subpd xmm0, xmm1; subpd xmm4, xmm5; shufpd xmm1, xmm0, 0; shufpd xmm5, xmm4, 0; addpd xmm5, xmm1; addpd xmm1, xmm1; subpd xmm1, xmm5; cvttpd2dq xmm5, xmm5; cvttpd2dq xmm1, xmm1; packssdw xmm5, xmm1; packuswb xmm5, xmm5; movd eax_ptr, xmm5; mov [esi_ptr], ax; shr eax_ptr, 16; stosw; loop_end: } } inline void dwt2(__out cv::Mat& CA, __out cv::Mat& CH, __out cv::Mat& CV, __out cv::Mat& CD, __in cv::Mat const& I) { if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1) return; double* ca = reinterpret_cast<double*>(CA.data); double* ch = reinterpret_cast<double*>(CH.data); double* cv = reinterpret_cast<double*>(CV.data); double* cd = reinterpret_cast<double*>(CD.data); unsigned char* row = reinterpret_cast<unsigned char*>(I.data); for(int i=0; i<I.rows; i+=2) { dwt2_row(ca, ch, cv, cd, row, row+I.cols, I.cols); ca += CA.cols; ch += CH.cols; cv += CV.cols; cd += CD.cols; row += I.cols*2; } } inline void idwt2(__out cv::Mat& I, __in cv::Mat const& CA, __in cv::Mat const& CH, __in cv::Mat const& CV, __in cv::Mat const& CD) { if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1) return; double* ca = reinterpret_cast<double*>(CA.data); double* ch = reinterpret_cast<double*>(CH.data); double* cv = reinterpret_cast<double*>(CV.data); double* cd = reinterpret_cast<double*>(CD.data); unsigned char* row = reinterpret_cast<unsigned char*>(I.data); for(int i=0; i<I.rows; i+=2) { idwt2_row(row, row+I.cols, ca, ch, cv, cd, I.cols); ca += CA.cols; ch += CH.cols; cv += CV.cols; cd += CD.cols; row += I.cols*2; } }
版权归作者所有,转载请注明出处!