MP3解码的最后一步是“多相合成滤波”,多相合成滤波算法见ISO/IEC 11172-3 ANNEX_B Figure 3-A.2,经过5个步骤,将输入序列X[0..31]的32个采样值,变换为32个PCM样本输出:
C代码
// ①Shift 64 to 1024 FIFO for i = 64 to 1023 V[i] = V[i-64] // ②Calculate 64 values V[i] by matrixing for i = 0 to 63 for k = 0 to 31 V[i] += N[i][k] * X[k] // 其中 N[i][K]=cos((16+i)*(2*k+1)*PI/64) // ③Building a 512 values vector U for i = 0 to 7 for j = 0 to 31 { U[64*i+j] += V[128*i+j] U[64*i+32+j] += V[128*i+96+j] } // ④Multiply U vector by D window for j = 0 to 511 W[i] = U[i] * D[i] // 其中D[i] 见ISO/IEC 11172-3,Table 3-B.3. // ⑤Calculating 32 Samples for i = 0 to 31 { double Si = 0 for j = 0 to 15 Si += W[i + 32 * j] //Output PCM Sample: PCMi = (short)(Si * 32768) }
算法中第②步矩阵运算是DCT(32→64)运算,将输入序列的32个值X[k]变换为输出序列的64个值V[i],直接运算共64 * 32 = 2048次乘法。下面简要介绍快速算法。
用32点DCT-II代替DCT(32→64)
余弦函数具有周期性和对称性,所以由DCT(32→32)得到输出序列的32个值,可以直接得到 DCT(32→64) 的64个输出值,方算法如下:
void in32out64(double in[32], double out[64]) { int i; for (i = 0; i < 16; i++) out[i] = in[i+16]; out[16] = 0.0; for (i = 17; i < 48; i++) out[i] = -in[48-i]; for (i = 48; i < 64; i++) out[i] = -in[i-48]; }
根据标准中对N[i][k]的规定知DCT(32→32)是32点DCT-II。DCT-II的快速算法采用:
将N点DCT-II表示为两个N/2点DCT-II之和 N点DCT-II可以表示为N/2点DCT-II和N/2点DCT-IV之和,而DCT-IV输出序列中的一个值可以用DCT-II输出序列中两个连续的值之和表示。
将N/2点DCT再表示为两个N/4点DCT之和,一直分解下去
可以将上述两步推导过程一直递归地进行下去,直到N=2。限于博客长度限制,并且输入公式不方便,这儿不写出具体推导过程。
为了方便查表求余弦值,先初始化N[i][k]:
#define PI 3.141592654 // 原文http://lfp001.iteye.com/ void init_N() { int i,k; for(i = 0; i < 32; i++) for(k = 0; k < 32; k++) N[i][k] = cos(i * (2 * k + 1) * PI / 64); }
2点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 4);
out[i] += in[k] * N'[i][k];
程序代码为:
void dct2(double in[2], double out[2]) { int i,k; for(i = 0; i < 2; i++) { out[i] = 0; for(k = 0; k < 2; k++) out[i] += in[k] * N[16 * i][k]; } }
2点DCT快速算法直接展开运算,这3项N[i][k]不用查表:
N[0][k]=1,N[16][0]=cos(PI/4), N[16][1]=cos(3PI/4)=-cos(PI/4)
out[0] = in[0] + in[1];
out[1] = in[0] * N[16][0] + in[1] * N[16][1] = (in[0] - in[1]) * 0.7071067811866;
其算法如下:
void fast_dct2(double in[2], double out[2]) { out[0] = in[0] + in[1]; out[1] = (in[0] - in[1]) * 0.7071067811866; }
4点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 8);
out[i] += in[k] * N'[i][k];
void dct4(double in[4], double out[4]) { int i,k; for(i = 0; i < 4; i++) { out[i] = 0; for(k = 0; k < 4; k++) out[i] += in[k] * N[8 * i][k]; } }
4点DCT快速算法: 分解为两个2点DCT
void fast_dct4(double in[4], double out[4]) { int i; double even_in[2], even_out[2]; double odd_in[2], odd_out[2]; for(i = 0; i < 2; i++) { even_in[i] = in[i] + in[3 - i]; odd_in[i] = (in[i] - in[3 - i]) / (2 * N[8][i]); } fast_dct2(even_in, even_out); fast_dct2(odd_in, odd_out); out[0] = even_out[0]; out[1] = odd_out[0] + odd_out[1]; out[2] = even_out[1]; out[3] = odd_out[1]; }
8点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 16);
out[i] += in[k] * N'[i][k];
void dct8(double in[8], double out[8]) { int i,k; for(i = 0; i < 8; i++) { out[i] = 0; for(k = 0; k < 8; k++) out[i] += in[k] * N[4 * i][k]; } }
8点DCT快速算法: 分解为两个4点DCT,4点DCT用快速算法
N[4][i]=cos((2*i+1)/16) i=0,1,2,3
void fast_dct8(double in[8], double out[8]) { int i; double even_in[4], even_out[4]; double odd_in[4], odd_out[4]; for(i = 0; i < 4; i++) { even_in[i] = in[i] + in[7 - i]; odd_in[i] = (in[i] - in[7 - i]) / (2 * N[4][i]); } fast_dct4(even_in, even_out); //直接产生out[0..7]的偶数项 fast_dct4(odd_in, odd_out); //间接产生out[0..7]的奇数项 for (i = 0; i < 3; i++) { out[2*i] = even_out[i]; out[2*i+1] = odd_out[i] + odd_out[i+1]; //! } out[6] = even_out[3]; out[7] = odd_out[3]; }
16点DCT
N'[i][k] = cos(i * (2 * k + 1) * PI / 32);
out[i] += in[k] * N'[i][k];
void dct16(double in[16], double out[16]) { int i,k; for(i = 0; i < 16; i++) { out[i] = 0; for(k = 0; k < 16; k++) out[i] += in[k] * N[2 * i][k]; } }
16点DCT快速算法: 分解为两个8点DCT,8点DCT用快速算法
N[2][i]=cos((2*i+1)/32) i=0,1,...7
void fast_dct16(double in[16], double out[16]) { int i; double even_in[8], even_out[8]; double odd_in[8], odd_out[8]; for(i = 0; i < 8; i++) { even_in[i] = in[i] + in[15 - i]; odd_in[i] = (in[i] - in[15 - i]) / (2 * N[2][i]); // 计算误差来源于此? } fast_dct8(even_in, even_out); // 直接产生out[0..15]的偶数项 fast_dct8(odd_in, odd_out); // 间接产生out[0..15]的奇数项 for (i = 0; i < 7; i++) { out[2*i] = even_out[i]; out[2*i+1] = odd_out[i] + odd_out[i+1]; //! } out[14] = even_out[7]; out[15] = odd_out[7]; }
32点DCT
N[i][k] = cos(i * (2 * k + 1) * PI / 64);
out[i] += in[k] * N[i][k];
void dct32(double in[32], double out[32]) { int i,k; for(i = 0; i < 32; i++) { out[i] = 0; for(k = 0; k < 32; k++) out[i] += in[k] * N[i][k]; } }
32点DCT快速算法: 分解为两个16点DCT,用16点DCT采用快速算法.共16+2*32=80次乘法
N[1][i]=cos((2*i+1)/64) i=0,1,...15
void fast_dct32(double in[32], double out[32]) { int i; double even_in[16], even_out[16]; double odd_in[16], odd_out[16]; for(i = 0; i < 16; i++) { even_in[i] = in[i] + in[31 - i]; odd_in[i] = (in[i] - in[31 - i]) / (2 * N[1][i]); } fast_dct16(even_in, even_out); //直接产生out[0..31]的偶数项 fast_dct16(odd_in, odd_out); //间接产生out[0..31]的奇数项 for (i = 0; i < 15; i++) { out[2*i] = even_out[i]; out[2*i+1] = odd_out[i] + odd_out[i+1]; //! } out[30] = even_out[15]; out[31] = odd_out[15]; }
各点DCT快速算法用到的N[i][j]:
DCT32: N[1][i]=cos((2*i+1)/64) i=0..15
DCT16: N[2][i]=cos((2*i+1)/32) i=0..7
DCT8: N[4][i]=cos((2*i+1)/16) i=0..3
DCT4: N[8][i]=cos((2*i+1)/8) i=0,1
DCT2: cos(PI/4) = 0.7071067811866
快速算法只需事先存储16+8+4+2+1=31个值到N[0..30],取代N[32][32]。
DCT-II(32→64)的快速算法的展开算法
解码一帧双声道的MP3,共要调用ISO/IEC 11172-3 ANNEX_B Figure 3-A.2第二步给出的矩阵运算2*2*18=72次,进行一次矩阵运算要进行浮点乘法是2048次,采用快速算法降低到80次,快速算法很完美。为什么还要将快速算法展开呢?由于矩阵运算调用频度极高,是影响解码速度的关键模块。分析以上快速算法函数可以看出两个特点:使用迭代和函数内大量的循环语句,应用迭代这种规律使展开成为可能,使用展开方法可以去掉中间各点DCT快速算法用到的循环语句,所以可以使矩阵运算的速度进一步提高。通过我对比实测用展开方法解码速度提升10%以上。下面给出的DCT-II(32->64)展开的快速算法是从我前几年写的MP3解码程序中直接COPY过来的,有比较详细的注解,对比上文的各点DCT的快速算法函数,很容易看懂。
void dct32to64(double in32[32], double out64[64]) { double in0,in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15; double out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15; double d8_0,d8_1,d8_2,d8_3,d8_4,d8_5,d8_6,d8_7; double ein0, ein1, oin0, oin1; //>>>>>>>>>>>>>>>> // 用DCT16计算DCT32输出[0..31]的偶数下标元素 in0 = in32[0] + in32[31]; in1 = in32[1] + in32[30]; in2 = in32[2] + in32[29]; in3 = in32[3] + in32[28]; in4 = in32[4] + in32[27]; in5 = in32[5] + in32[26]; in6 = in32[6] + in32[25]; in7 = in32[7] + in32[24]; in8 = in32[8] + in32[23]; in9 = in32[9] + in32[22]; in10 = in32[10] + in32[21]; in11 = in32[11] + in32[20]; in12 = in32[12] + in32[19]; in13 = in32[13] + in32[18]; in14 = in32[14] + in32[17]; in15 = in32[15] + in32[16]; //DCT16 { //>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素 d8_0 = in0 + in15; d8_1 = in1 + in14; d8_2 = in2 + in13; d8_3 = in3 + in12; d8_4 = in4 + in11; d8_5 = in5 + in10; d8_6 = in6 + in9; d8_7 = in7 + in8; //DCT8. 加(减)法29,乘法12次 { //>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素 out1 = d8_0 + d8_7; out3 = d8_1 + d8_6; out5 = d8_2 + d8_5; out7 = d8_3 + d8_4; //>>e DCT2 ein0 = out1 + out7; ein1 = out3 + out5; out64[48] = -ein0 - ein1; out64[0] = (ein0 - ein1) * 0.70710678118654752; // 0.5/cos(PI/4) //>>o DCT2 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos( PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) out2 = oin0 + oin1; out12 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) out64[40] = out64[56] = -out2 - out12; out64[8] = out12; //<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素 //>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素 //o DCT4 part1 out1 = (d8_0 - d8_7) * 0.50979557910415917; // 0.5/cos( PI/16) out3 = (d8_1 - d8_6) * 0.60134488693504528; // 0.5/cos(3PI/16) out5 = (d8_2 - d8_5) * 0.89997622313641570; // 0.5/cos(5PI/16) out7 = (d8_3 - d8_4) * 2.56291544774150618; // 0.5/cos(7PI/16) //o DCT4 part2 //e DCT2 part1 ein0 = out1 + out7; ein1 = out3 + out5; //o DCT2 part1 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos(PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) //e DCT2 part2 out1 = ein0 + ein1; out5 = (ein0 - ein1) * 0.70710678118654752; // cos(PI/4) //o DCT2 part2 out3 = oin0 + oin1; out7 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) out3 += out7; //o DCT4 part3 out64[44] = out64[52] = -out1 - out3; //out1+=out3 out64[36] = out64[60] = -out3 - out5; //out3+=out5 out64[4] = out5 + out7; //out5+=out7 out64[12] = out7; //<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素 } //<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素 //------------------------------------------------------------------------- //>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素 d8_0 = (in0 - in15) * 0.50241928618815571; // 0.5/cos( 1 * PI/32) d8_1 = (in1 - in14) * 0.52249861493968888; // 0.5/cos( 3 * PI/32) d8_2 = (in2 - in13) * 0.56694403481635770; // 0.5/cos( 5 * PI/32) d8_3 = (in3 - in12) * 0.64682178335999013; // 0.5/cos( 7 * PI/32) d8_4 = (in4 - in11) * 0.78815462345125022; // 0.5/cos( 9 * PI/32) d8_5 = (in5 - in10) * 1.06067768599034747; // 0.5/cos(11 * PI/32) d8_6 = (in6 - in9) * 1.72244709823833393; // 0.5/cos(13 * PI/32) d8_7 = (in7 - in8) * 5.10114861868916386; // 0.5/cos(15 * PI/32) //DCT8 { //>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素. out3 = d8_0 + d8_7; out7 = d8_1 + d8_6; out11 = d8_2 + d8_5; out15 = d8_3 + d8_4; //>>e DCT2 ein0 = out3 + out15; ein1 = out7 + out11; out1 = ein0 + ein1; out9 = (ein0 - ein1) * 0.70710678118654752; // 0.5/cos(PI/4) //>>o DCT2 oin0 = (out3 - out15) * 0.54119610014619698; // 0.5/cos( PI/8) oin1 = (out7 - out11) * 1.30656296487637653; // 0.5/cos(3PI/8) out5 = oin0 + oin1; out13 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) out5 += out13; //<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素 //>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素 //o DCT4 part1 out3 = (d8_0 - d8_7) * 0.50979557910415917; // 0.5/cos( PI/16) out7 = (d8_1 - d8_6) * 0.60134488693504528; // 0.5/cos(3PI/16) out11 = (d8_2 - d8_5) * 0.89997622313641570; // 0.5/cos(5PI/16) out15 = (d8_3 - d8_4) * 2.56291544774150618; // 0.5/cos(7PI/16) //o DCT4 part2 //e DCT2 part1 ein0 = out3 + out15; ein1 = out7 + out11; //o DCT2 part1 oin0 = (out3 - out15) * 0.54119610014619698; // 0.5/cos(PI/8) oin1 = (out7 - out11) * 1.30656296487637653; // 0.5/cos(3PI/8) //e DCT2 part2 out3 = ein0 + ein1; out11 = (ein0 - ein1) * 0.70710678118654752; // cos(PI/4) //o DCT2 part2 out7 = oin0 + oin1; out15 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) out7 += out15; //o DCT4 part3 out3 += out7; out7 += out11; out11 += out15; //<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素 } out64[46] = out64[50] = -out1 - out3; //out1 += out3 out64[42] = out64[54] = -out3 - out5; //out3 += out5 out64[38] = out64[58] = -out5 - out7; //out5 += out7 out64[34] = out64[62] = -out7 - out9; //out7 += out9 out64[2] = out9 + out11; //out9 += out11 out64[6] = out11 + out13; //out11 += out13 out64[10] = out13 + out15; //out13 += out15 //<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素 } out64[14] = out15; //out64[14]=out32[30] //<<<<<<<<<<<<<<<< // 完成计算DCT32输出[0..31]的偶数下标元素 //============================================================================= //>>>>>>>>>>>>>>>> // 用DCT16计算计算DCT32输出[0..31]的奇数下标元素 in0 = (in32[0] - in32[31]) * 0.50060299823519630; // 0.5/cos( 1 * PI/64) in1 = (in32[1] - in32[30]) * 0.50547095989754366; // 0.5/cos( 3 * PI/64) in2 = (in32[2] - in32[29]) * 0.51544730992262455; // 0.5/cos( 5 * PI/64) in3 = (in32[3] - in32[28]) * 0.53104259108978417; // 0.5/cos( 7 * PI/64) in4 = (in32[4] - in32[27]) * 0.55310389603444453; // 0.5/cos( 9 * PI/64) in5 = (in32[5] - in32[26]) * 0.58293496820613387; // 0.5/cos(11 * PI/64) in6 = (in32[6] - in32[25]) * 0.62250412303566482; // 0.5/cos(13 * PI/64) in7 = (in32[7] - in32[24]) * 0.67480834145500575; // 0.5/cos(15 * PI/64) in8 = (in32[8] - in32[23]) * 0.74453627100229845; // 0.5/cos(17 * PI/64) in9 = (in32[9] - in32[22]) * 0.83934964541552704; // 0.5/cos(19 * PI/64) in10 = (in32[10] - in32[21]) * 0.97256823786196069; // 0.5/cos(21 * PI/64) in11 = (in32[11] - in32[20]) * 1.16943993343288495; // 0.5/cos(23 * PI/64) in12 = (in32[12] - in32[19]) * 1.48416461631416628; // 0.5/cos(25 * PI/64) in13 = (in32[13] - in32[18]) * 2.05778100995341155; // 0.5/cos(27 * PI/64) in14 = (in32[14] - in32[17]) * 3.40760841846871879; // 0.5/cos(29 * PI/64) in15 = (in32[15] - in32[16]) * 10.1900081235480568; // 0.5/cos(31 * PI/64) //DCT16 { //>>>>>>>> 用DCT8计算DCT16输出[0..15]的偶数下标元素: d8_0 = in0 + in15; d8_1 = in1 + in14; d8_2 = in2 + in13; d8_3 = in3 + in12; d8_4 = in4 + in11; d8_5 = in5 + in10; d8_6 = in6 + in9; d8_7 = in7 + in8; //DCT8 { //>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素 out1 = d8_0 + d8_7; out3 = d8_1 + d8_6; out5 = d8_2 + d8_5; out7 = d8_3 + d8_4; //>>e DCT2 ein0 = out1 + out7; ein1 = out3 + out5; out0 = ein0 + ein1; out8 = (ein0 - ein1) * 0.70710678118654752; // 0.5/cos(PI/4) //>>o DCT2 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos( PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) out4 = oin0 + oin1; out12 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) out4 += out12; //<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素 //>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素 //o DCT4 part1 out1 = (d8_0 - d8_7) * 0.50979557910415917; // 0.5/cos( PI/16) out3 = (d8_1 - d8_6) * 0.60134488693504528; // 0.5/cos(3PI/16) out5 = (d8_2 - d8_5) * 0.89997622313641570; // 0.5/cos(5PI/16) out7 = (d8_3 - d8_4) * 2.56291544774150618; // 0.5/cos(7PI/16) //o DCT4 part2 //e DCT2 part1 ein0 = out1 + out7; ein1 = out3 + out5; //o DCT2 part1 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos(PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) //e DCT2 part2 out2 = ein0 + ein1; out10 = (ein0 - ein1) * 0.70710678118654752; // cos(PI/4) //o DCT2 part2 out6 = oin0 + oin1; out14 = (oin0 - oin1) * 0.70710678118654752; out6 += out14; //o DCT4 part3 out2 += out6; out6 += out10; out10 += out14; //<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素 } //<<<<<<<< 完成计算DCT16输出[0..15]的偶数下标元素 //------------------------------------------------------------------------- //>>>>>>>> 用DCT8计算DCT16输出[0..15]的奇数下标元素 d8_0 = (in0 - in15) * 0.50241928618815571; // 0.5/cos( 1 * PI/32) d8_1 = (in1 - in14) * 0.52249861493968888; // 0.5/cos( 3 * PI/32) d8_2 = (in2 - in13) * 0.56694403481635770; // 0.5/cos( 5 * PI/32) d8_3 = (in3 - in12) * 0.64682178335999013; // 0.5/cos( 7 * PI/32) d8_4 = (in4 - in11) * 0.78815462345125022; // 0.5/cos( 9 * PI/32) d8_5 = (in5 - in10) * 1.06067768599034747; // 0.5/cos(11 * PI/32) d8_6 = (in6 - in9) * 1.72244709823833393; // 0.5/cos(13 * PI/32) d8_7 = (in7 - in8) * 5.10114861868916386; // 0.5/cos(15 * PI/32) //DCT8 { //>>>>e 用DCT4计算DCT8的输出[0..7]的偶数下标元素. out1 = d8_0 + d8_7; out3 = d8_1 + d8_6; out5 = d8_2 + d8_5; out7 = d8_3 + d8_4; //>>e DCT2 ein0 = out1 + out7; ein1 = out3 + out5; in0 = ein0 + ein1; //out0->in0,out4->in4 in4 = (ein0 - ein1) * 0.70710678118654752; // 0.5/cos(PI/4) //>>o DCT2 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos( PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) in2 = oin0 + oin1; //out2->in2,out6->in6 in6 = (oin0 - oin1) * 0.70710678118654752; // cos(PI/4) in2 += in6; //<<<<e 完成计算DCT8的输出[0..7]的偶数下标元素 //>>>>o 用DCT4计算DCT8的输出[0..7]的奇数下标元素 //o DCT4 part1 out1 = (d8_0 - d8_7) * 0.50979557910415917; // 0.5/cos( PI/16) out3 = (d8_1 - d8_6) * 0.60134488693504528; // 0.5/cos(3PI/16) out5 = (d8_2 - d8_5) * 0.89997622313641570; // 0.5/cos(5PI/16) out7 = (d8_3 - d8_4) * 2.56291544774150618; // 0.5/cos(7PI/16) //o DCT4 part2 //e DCT2 part1 ein0 = out1 + out7; ein1 = out3 + out5; //o DCT2 part1 oin0 = (out1 - out7) * 0.54119610014619698; // 0.5/cos(PI/8) oin1 = (out3 - out5) * 1.30656296487637653; // 0.5/cos(3PI/8) //e DCT2 part2 out1 = ein0 + ein1; out5 = (ein0 - ein1) * 0.70710678118654752; // cos(PI/4) //o DCT2 part2 out3 = oin0 + oin1; out15 = (oin0 - oin1) * 0.70710678118654752; out3 += out15; //o DCT4 part3 out1 += out3; out3 += out5; out5 += out15; //<<<<o 完成计算DCT8的输出[0..7]的奇数下标元素 } //out15=out7 out13 = in6 + out15; //out13=out6+ou7 out11 = out5 + in6; //out11=out5+ou6 out9 = in4 + out5; //out9 =out4+ou5 out7 = out3 + in4; //out7 =out3+ou4 out5 = in2 + out3; //out5 =out2+ou3 out3 = out1 + in2; //out3 =out1+ou2 out1 += in0; //out1 =out0+ou1 //<<<<<<<< 完成计算DCT16输出[0..15]的奇数下标元素 } //out32[i]=out[i]+out[i+1]; out32[31]=out[15] out64[47] = out64[49] = -out0 - out1; out64[45] = out64[51] = -out1 - out2; out64[43] = out64[53] = -out2 - out3; out64[41] = out64[55] = -out3 - out4; out64[39] = out64[57] = -out4 - out5; out64[37] = out64[59] = -out5 - out6; out64[35] = out64[61] = -out6 - out7; out64[33] = out64[63] = -out7 - out8; out64[1] = out8 + out9; out64[3] = out9 + out10; out64[5] = out10 + out11; out64[7] = out11 + out12; out64[9] = out12 + out13; out64[11] = out13 + out14; out64[13] = out14 + out15; out64[15] = out15; //<<<<<<<<<<<<<<<< out64[16] = 0; out64[17] = -out64[15]; out64[18] = -out64[14]; out64[19] = -out64[13]; out64[20] = -out64[12]; out64[21] = -out64[11]; out64[22] = -out64[10]; out64[23] = -out64[9]; out64[24] = -out64[8]; out64[25] = -out64[7]; out64[26] = -out64[6]; out64[27] = -out64[5]; out64[28] = -out64[4]; out64[29] = -out64[3]; out64[30] = -out64[2]; out64[31] = -out64[1]; out64[32] = -out64[0]; }
本文的DCT-II(32→32)快速算法的作者是Michael Hipp,开源程序mpg123使用的是该算法。贴出mpg123的DCT快速算法代码:
void dct64(real *out0,real *out1,real *samples) { real bufs[64]; { register int i,j; register real *b1,*b2,*bs,*costab; b1 = samples; bs = bufs; costab = pnts[0]+16; b2 = b1 + 32; for(i=15;i>=0;i--) *bs++ = (*b1++ + *--b2); for(i=15;i>=0;i--) *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); b1 = bufs; costab = pnts[1]+8; b2 = b1 + 16; { for(i=7;i>=0;i--) *bs++ = (*b1++ + *--b2); for(i=7;i>=0;i--) *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); b2 += 32; costab += 8; for(i=7;i>=0;i--) *bs++ = (*b1++ + *--b2); for(i=7;i>=0;i--) *bs++ = REAL_MUL((*b1++ - *--b2), *--costab); b2 += 32; } bs = bufs; costab = pnts[2]; b2 = b1 + 8; for(j=2;j;j--) { for(i=3;i>=0;i--) *bs++ = (*b1++ + *--b2); for(i=3;i>=0;i--) *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]); b2 += 16; for(i=3;i>=0;i--) *bs++ = (*b1++ + *--b2); for(i=3;i>=0;i--) *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]); b2 += 16; } b1 = bufs; costab = pnts[3]; b2 = b1 + 4; for(j=4;j;j--) { *bs++ = (*b1++ + *--b2); *bs++ = (*b1++ + *--b2); *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]); *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]); b2 += 8; *bs++ = (*b1++ + *--b2); *bs++ = (*b1++ + *--b2); *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]); *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]); b2 += 8; } bs = bufs; costab = pnts[4]; for(j=8;j;j--) { real v0,v1; v0=*b1++; v1 = *b1++; *bs++ = (v0 + v1); *bs++ = REAL_MUL((v0 - v1), (*costab)); v0=*b1++; v1 = *b1++; *bs++ = (v0 + v1); *bs++ = REAL_MUL((v1 - v0), (*costab)); } } { register real *b1; register int i; for(b1=bufs,i=8;i;i--,b1+=4) b1[2] += b1[3]; for(b1=bufs,i=4;i;i--,b1+=8) { b1[4] += b1[6]; b1[6] += b1[5]; b1[5] += b1[7]; } for(b1=bufs,i=2;i;i--,b1+=16) { b1[8] += b1[12]; b1[12] += b1[10]; b1[10] += b1[14]; b1[14] += b1[9]; b1[9] += b1[13]; b1[13] += b1[11]; b1[11] += b1[15]; } } out0[0x10*16] = bufs[0]; out0[0x10*15] = bufs[16+0] + bufs[16+8]; out0[0x10*14] = bufs[8]; out0[0x10*13] = bufs[16+8] + bufs[16+4]; out0[0x10*12] = bufs[4]; out0[0x10*11] = bufs[16+4] + bufs[16+12]; out0[0x10*10] = bufs[12]; out0[0x10* 9] = bufs[16+12] + bufs[16+2]; out0[0x10* 8] = bufs[2]; out0[0x10* 7] = bufs[16+2] + bufs[16+10]; out0[0x10* 6] = bufs[10]; out0[0x10* 5] = bufs[16+10] + bufs[16+6]; out0[0x10* 4] = bufs[6]; out0[0x10* 3] = bufs[16+6] + bufs[16+14]; out0[0x10* 2] = bufs[14]; out0[0x10* 1] = bufs[16+14] + bufs[16+1]; out0[0x10* 0] = bufs[1]; out1[0x10* 0] = bufs[1]; out1[0x10* 1] = bufs[16+1] + bufs[16+9]; out1[0x10* 2] = bufs[9]; out1[0x10* 3] = bufs[16+9] + bufs[16+5]; out1[0x10* 4] = bufs[5]; out1[0x10* 5] = bufs[16+5] + bufs[16+13]; out1[0x10* 6] = bufs[13]; out1[0x10* 7] = bufs[16+13] + bufs[16+3]; out1[0x10* 8] = bufs[3]; out1[0x10* 9] = bufs[16+3] + bufs[16+11]; out1[0x10*10] = bufs[11]; out1[0x10*11] = bufs[16+11] + bufs[16+7]; out1[0x10*12] = bufs[7]; out1[0x10*13] = bufs[16+7] + bufs[16+15]; out1[0x10*14] = bufs[15]; out1[0x10*15] = bufs[16+15]; }
本文的“MP3解码之DCT(32→64)快速算法的展开”的JAVA代码是jmp123开源项目的一部分。
【jmp123下载地址】http://jmp123.sourceforge.net/