我们都知道,当播放速度改变的时候,会带来一个副作用,那就是音调也会随着播放的速度而改
变。播放速度加快,声音将升调,变得尖锐,播放速度减慢,声音将降调,变得低沉,当然这是题
外之话。改变音频播放速度的方法有很多,我只针对ST变速算法的核心,也就是线性插值算法的实
现。对于ST的插值算法前面已经提到。
k = (y - y0)/(y1-y0)
->y=(1-k) * y0+k * y1
k值也就是现在声音播放速度和原始声音播放速度的比。也就是加快/减慢的比率。从这个公式上看
,k为比率已知,y1为当前已知的一个采样值,y值和原来的位置y0有关,如何理解呢?假如你把一
连串的采样分成若干段,每次拿一段出来处理,在某段的结尾和紧接的一段之间做插值算法,就要
保存前一段结尾的采样点。这下子好理解了吧。因此针对16位双声道的音频输入,我们需要先定义
两个变量(左右声道)来保存前面的值。
short sPrevSampleL = 0,sPrevSampleR = 0;
然后算法实现如下:
第一个参数表示比率,可以理解为插值的间距,越小帧插得越多,播放速度也就越慢,越大帧插得
少或者直接跳过,播放速度也就越快。
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
uint srcPos = 0, i = 0, used = 0;
float fSlopeCount = 0.0f;
if (nSamples == 0) return 0;
while (fSlopeCount <= 1.0f)
{
dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src
[0]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *
src[1]);
i++;
fSlopeCount += fRate;
}
fSlopeCount -= 1.0f;
if (nSamples > 1)
{
while (1)
{
while (fSlopeCount > 1.0f)
{
fSlopeCount -= 1.0f;
used ++;
if (used >= nSamples - 1) goto end;
}
srcPos = 2 * used;
dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
+ fSlopeCount * src[srcPos + 2]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
+ fSlopeCount * src[srcPos + 3]);
i++;
fSlopeCount += fRate;
}
}
end:
sPrevSampleL = src[2 * nSamples - 2];
sPrevSampleR = src[2 * nSamples - 1];
return i;
}
完整的例程:
//对16位双声道pcm编码的wav文件插值。
//作者:核桃
//修改main函数中float rate变量的值可以改变播放速度
//取值范围最好在0.5~2之间
#include <stdio.h>
#include <stdlib.h>
#define BUFF_SIZE 4096
#define SCALE 3
#ifndef uint
typedef unsigned int uint;
#endif
typedef struct
{
char riff_char[4];
int package_len;
char wave[4];
} WavRiff;
typedef struct
{
char fmt[4];
int format_len;
short fixed;
short channel_number;
int sample_rate;
int byte_rate;
short byte_per_sample;
short bits_per_sample;
} WavFormat;
typedef struct
{
char data_field[4];
uint data_len;
} WavData;
typedef struct
{
WavRiff riff;
WavFormat format;
WavData data;
} WavHeader;
short inbuffer[BUFF_SIZE];
short outbuffer[SCALE*BUFF_SIZE];
short sPrevSampleL = 0,sPrevSampleR = 0;
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples);
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
uint srcPos = 0, i = 0, used = 0;
float fSlopeCount = 0.0f;
if (nSamples == 0) return 0;
while (fSlopeCount <= 1.0f)
{
dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src
[0]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *
src[1]);
i++;
fSlopeCount += fRate;
}
fSlopeCount -= 1.0f;
if (nSamples > 1)
{
while (1)
{
while (fSlopeCount > 1.0f)
{
fSlopeCount -= 1.0f;
used ++;
if (used >= nSamples - 1) goto end;
}
srcPos = 2 * used;
dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
+ fSlopeCount * src[srcPos + 2]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
+ fSlopeCount * src[srcPos + 3]);
i++;
fSlopeCount += fRate;
}
}
end:
sPrevSampleL = src[2 * nSamples - 2];
sPrevSampleR = src[2 * nSamples - 1];
return i;
}
int main(int arg, char **argc)
{
FILE *pIn = NULL,*pOut = NULL;
WavHeader *wheader = NULL;
uint isamples = 0,numBytes = 0,totalBytes = 0;
float rate = 0.8f;
if (arg < 3)
{
printf("usage: %s input.wav output.wav/n",argc[0]);
return -1;
}
wheader = (WavHeader *)malloc(sizeof(WavHeader));
pIn = fopen(argc[1], "rb");
pOut = fopen(argc[2], "wb");
numBytes = fread(wheader,1,sizeof(WavHeader),pIn);
fwrite(wheader,1,sizeof(WavHeader),pOut);
while(!feof(pIn))
{
numBytes = fread(inbuffer,1,BUFF_SIZE,pIn);
numBytes = numBytes >> 2;
isamples = InterpolationStereo(rate,outbuffer,inbuffer,numBytes);
isamples = isamples << 2;
fwrite(outbuffer,1,isamples,pOut);
totalBytes += isamples;
}
fseek(pOut, 0, SEEK_SET);
wheader->data.data_len = totalBytes;
wheader->riff.package_len = totalBytes + sizeof(WavHeader)-4*sizeof(char)-
sizeof(int);
fwrite(wheader,1,sizeof(WavHeader),pOut);
free(wheader);
fclose(pIn);
fclose(pOut);
return 0;
}
PS:根据某某人的采样定理,采样频率一定要等于或者大于信号中最高频率的2倍。我们可以理解
为,最快播放速度不宜超过2倍,最慢播放速度不宜低于原来信号的1/2也就是0.5倍,如果不是声
音失真会非常严重。
变。播放速度加快,声音将升调,变得尖锐,播放速度减慢,声音将降调,变得低沉,当然这是题
外之话。改变音频播放速度的方法有很多,我只针对ST变速算法的核心,也就是线性插值算法的实
现。对于ST的插值算法前面已经提到。
k = (y - y0)/(y1-y0)
->y=(1-k) * y0+k * y1
k值也就是现在声音播放速度和原始声音播放速度的比。也就是加快/减慢的比率。从这个公式上看
,k为比率已知,y1为当前已知的一个采样值,y值和原来的位置y0有关,如何理解呢?假如你把一
连串的采样分成若干段,每次拿一段出来处理,在某段的结尾和紧接的一段之间做插值算法,就要
保存前一段结尾的采样点。这下子好理解了吧。因此针对16位双声道的音频输入,我们需要先定义
两个变量(左右声道)来保存前面的值。
short sPrevSampleL = 0,sPrevSampleR = 0;
然后算法实现如下:
第一个参数表示比率,可以理解为插值的间距,越小帧插得越多,播放速度也就越慢,越大帧插得
少或者直接跳过,播放速度也就越快。
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
uint srcPos = 0, i = 0, used = 0;
float fSlopeCount = 0.0f;
if (nSamples == 0) return 0;
while (fSlopeCount <= 1.0f)
{
dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src
[0]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *
src[1]);
i++;
fSlopeCount += fRate;
}
fSlopeCount -= 1.0f;
if (nSamples > 1)
{
while (1)
{
while (fSlopeCount > 1.0f)
{
fSlopeCount -= 1.0f;
used ++;
if (used >= nSamples - 1) goto end;
}
srcPos = 2 * used;
dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
+ fSlopeCount * src[srcPos + 2]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
+ fSlopeCount * src[srcPos + 3]);
i++;
fSlopeCount += fRate;
}
}
end:
sPrevSampleL = src[2 * nSamples - 2];
sPrevSampleR = src[2 * nSamples - 1];
return i;
}
完整的例程:
//对16位双声道pcm编码的wav文件插值。
//作者:核桃
//修改main函数中float rate变量的值可以改变播放速度
//取值范围最好在0.5~2之间
#include <stdio.h>
#include <stdlib.h>
#define BUFF_SIZE 4096
#define SCALE 3
#ifndef uint
typedef unsigned int uint;
#endif
typedef struct
{
char riff_char[4];
int package_len;
char wave[4];
} WavRiff;
typedef struct
{
char fmt[4];
int format_len;
short fixed;
short channel_number;
int sample_rate;
int byte_rate;
short byte_per_sample;
short bits_per_sample;
} WavFormat;
typedef struct
{
char data_field[4];
uint data_len;
} WavData;
typedef struct
{
WavRiff riff;
WavFormat format;
WavData data;
} WavHeader;
short inbuffer[BUFF_SIZE];
short outbuffer[SCALE*BUFF_SIZE];
short sPrevSampleL = 0,sPrevSampleR = 0;
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples);
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
uint srcPos = 0, i = 0, used = 0;
float fSlopeCount = 0.0f;
if (nSamples == 0) return 0;
while (fSlopeCount <= 1.0f)
{
dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src
[0]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *
src[1]);
i++;
fSlopeCount += fRate;
}
fSlopeCount -= 1.0f;
if (nSamples > 1)
{
while (1)
{
while (fSlopeCount > 1.0f)
{
fSlopeCount -= 1.0f;
used ++;
if (used >= nSamples - 1) goto end;
}
srcPos = 2 * used;
dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
+ fSlopeCount * src[srcPos + 2]);
dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
+ fSlopeCount * src[srcPos + 3]);
i++;
fSlopeCount += fRate;
}
}
end:
sPrevSampleL = src[2 * nSamples - 2];
sPrevSampleR = src[2 * nSamples - 1];
return i;
}
int main(int arg, char **argc)
{
FILE *pIn = NULL,*pOut = NULL;
WavHeader *wheader = NULL;
uint isamples = 0,numBytes = 0,totalBytes = 0;
float rate = 0.8f;
if (arg < 3)
{
printf("usage: %s input.wav output.wav/n",argc[0]);
return -1;
}
wheader = (WavHeader *)malloc(sizeof(WavHeader));
pIn = fopen(argc[1], "rb");
pOut = fopen(argc[2], "wb");
numBytes = fread(wheader,1,sizeof(WavHeader),pIn);
fwrite(wheader,1,sizeof(WavHeader),pOut);
while(!feof(pIn))
{
numBytes = fread(inbuffer,1,BUFF_SIZE,pIn);
numBytes = numBytes >> 2;
isamples = InterpolationStereo(rate,outbuffer,inbuffer,numBytes);
isamples = isamples << 2;
fwrite(outbuffer,1,isamples,pOut);
totalBytes += isamples;
}
fseek(pOut, 0, SEEK_SET);
wheader->data.data_len = totalBytes;
wheader->riff.package_len = totalBytes + sizeof(WavHeader)-4*sizeof(char)-
sizeof(int);
fwrite(wheader,1,sizeof(WavHeader),pOut);
free(wheader);
fclose(pIn);
fclose(pOut);
return 0;
}
PS:根据某某人的采样定理,采样频率一定要等于或者大于信号中最高频率的2倍。我们可以理解
为,最快播放速度不宜超过2倍,最慢播放速度不宜低于原来信号的1/2也就是0.5倍,如果不是声
音失真会非常严重。