SoundTouch音频处理库源码分析及算法提取(9)

  我们都知道,当播放速度改变的时候,会带来一个副作用,那就是音调也会随着播放的速度而改

变。播放速度加快,声音将升调,变得尖锐,播放速度减慢,声音将降调,变得低沉,当然这是题

外之话。改变音频播放速度的方法有很多,我只针对ST变速算法的核心,也就是线性插值算法的实

现。对于ST的插值算法前面已经提到。
  k = (y - y0)/(y1-y0)
->y=(1-k) * y0+k * y1
k值也就是现在声音播放速度和原始声音播放速度的比。也就是加快/减慢的比率。从这个公式上看

,k为比率已知,y1为当前已知的一个采样值,y值和原来的位置y0有关,如何理解呢?假如你把一

连串的采样分成若干段,每次拿一段出来处理,在某段的结尾和紧接的一段之间做插值算法,就要

保存前一段结尾的采样点。这下子好理解了吧。因此针对16位双声道的音频输入,我们需要先定义

两个变量(左右声道)来保存前面的值。
short sPrevSampleL = 0,sPrevSampleR = 0;
然后算法实现如下:
第一个参数表示比率,可以理解为插值的间距,越小帧插得越多,播放速度也就越慢,越大帧插得

少或者直接跳过,播放速度也就越快。
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
    uint srcPos = 0, i = 0, used = 0;
 float fSlopeCount = 0.0f;
 
    if (nSamples == 0) return 0;
    while (fSlopeCount <= 1.0f)
    {
        dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src

[0]);
        dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *

src[1]);
        i++;
        fSlopeCount += fRate;
    }
    fSlopeCount -= 1.0f; 
    if (nSamples > 1)
    {
        while (1)
        {
            while (fSlopeCount > 1.0f)
            {
                fSlopeCount -= 1.0f;
                used ++;
                if (used >= nSamples - 1) goto end;
            }
            srcPos = 2 * used; 
            dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
                + fSlopeCount * src[srcPos + 2]);
            dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
                + fSlopeCount * src[srcPos + 3]); 
            i++;
            fSlopeCount += fRate;
        }
    }
end:
    sPrevSampleL = src[2 * nSamples - 2];
    sPrevSampleR = src[2 * nSamples - 1]; 
    return i;
}

完整的例程:

//对16位双声道pcm编码的wav文件插值。
//作者:核桃
//修改main函数中float rate变量的值可以改变播放速度
//取值范围最好在0.5~2之间

#include
#include

#define BUFF_SIZE 4096
#define SCALE 3

#ifndef uint
typedef unsigned int uint;
#endif

typedef struct
{
    char riff_char[4];
    int  package_len;
    char wave[4];
} WavRiff;

typedef struct
{
    char  fmt[4];
    int   format_len;
    short fixed;
    short channel_number;
    int   sample_rate;
    int   byte_rate;
    short byte_per_sample;
    short bits_per_sample;
} WavFormat;

typedef struct
{
    char  data_field[4];
    uint  data_len;
} WavData;

typedef struct
{
    WavRiff   riff;
    WavFormat format;
    WavData   data;
} WavHeader;

short inbuffer[BUFF_SIZE];
short outbuffer[SCALE*BUFF_SIZE];
short sPrevSampleL = 0,sPrevSampleR = 0;

uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples);

uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples)
{
    uint srcPos = 0, i = 0, used = 0;
 float fSlopeCount = 0.0f;
 
    if (nSamples == 0) return 0;
    while (fSlopeCount <= 1.0f)
    {
        dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src

[0]);
        dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount *

src[1]);
        i++;
        fSlopeCount += fRate;
    }
    fSlopeCount -= 1.0f; 
    if (nSamples > 1)
    {
        while (1)
        {
            while (fSlopeCount > 1.0f)
            {
                fSlopeCount -= 1.0f;
                used ++;
                if (used >= nSamples - 1) goto end;
            }
            srcPos = 2 * used; 
            dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos]
                + fSlopeCount * src[srcPos + 2]);
            dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1]
                + fSlopeCount * src[srcPos + 3]); 
            i++;
            fSlopeCount += fRate;
        }
    }
end:
    sPrevSampleL = src[2 * nSamples - 2];
    sPrevSampleR = src[2 * nSamples - 1]; 
    return i;
}

int main(int arg, char **argc)
{
 FILE *pIn = NULL,*pOut = NULL;
 WavHeader *wheader = NULL;
 uint isamples = 0,numBytes = 0,totalBytes = 0;
 float rate = 0.8f;
 if (arg < 3)
 {
  printf("usage: %s input.wav output.wav/n",argc[0]);
  return -1;
 }
 wheader = (WavHeader *)malloc(sizeof(WavHeader));
 pIn = fopen(argc[1], "rb");
 pOut = fopen(argc[2], "wb");
 numBytes = fread(wheader,1,sizeof(WavHeader),pIn);
 fwrite(wheader,1,sizeof(WavHeader),pOut);
 
 while(!feof(pIn))
 {
  numBytes = fread(inbuffer,1,BUFF_SIZE,pIn);
  numBytes = numBytes >> 2;
  isamples = InterpolationStereo(rate,outbuffer,inbuffer,numBytes);
  isamples = isamples << 2;
  fwrite(outbuffer,1,isamples,pOut);
  totalBytes += isamples;
 }
 fseek(pOut, 0, SEEK_SET);
 wheader->data.data_len = totalBytes;
 wheader->riff.package_len = totalBytes + sizeof(WavHeader)-4*sizeof(char)-
  
  sizeof(int);
 fwrite(wheader,1,sizeof(WavHeader),pOut);
 free(wheader);
 fclose(pIn);
 fclose(pOut);
 return 0;
}

  PS:根据某某人的采样定理,采样频率一定要等于或者大于信号中最高频率的2倍。我们可以理解

为,最快播放速度不宜超过2倍,最慢播放速度不宜低于原来信号的1/2也就是0.5倍,如果不是声

音失真会非常严重。

你可能感兴趣的:(音频处理)