ST的插值算法

  我们都知道,当播放速度改变的时候,会带来一个副作用,那就是音调也会随着播放的速度而改 
变。播放速度加快,声音将升调,变得尖锐,播放速度减慢,声音将降调,变得低沉,当然这是题 
外之话。改变音频播放速度的方法有很多,我只针对ST变速算法的核心,也就是线性插值算法的实 
现。对于ST的插值算法前面已经提到。 
  k = (y - y0)/(y1-y0) 
->y=(1-k) * y0+k * y1 
k值也就是现在声音播放速度和原始声音播放速度的比。也就是加快/减慢的比率。从这个公式上看 
,k为比率已知,y1为当前已知的一个采样值,y值和原来的位置y0有关,如何理解呢?假如你把一 
连串的采样分成若干段,每次拿一段出来处理,在某段的结尾和紧接的一段之间做插值算法,就要 
保存前一段结尾的采样点。这下子好理解了吧。因此针对16位双声道的音频输入,我们需要先定义 
两个变量(左右声道)来保存前面的值。 
short sPrevSampleL = 0,sPrevSampleR = 0; 
然后算法实现如下: 
第一个参数表示比率,可以理解为插值的间距,越小帧插得越多,播放速度也就越慢,越大帧插得 
少或者直接跳过,播放速度也就越快。 
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples) 

    uint srcPos = 0, i = 0, used = 0; 
 float fSlopeCount = 0.0f; 
  
    if (nSamples == 0) return 0; 
    while (fSlopeCount <= 1.0f) 
    { 
        dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src 
[0]); 
        dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount * 
src[1]); 
        i++; 
        fSlopeCount += fRate; 
    } 
    fSlopeCount -= 1.0f;  
    if (nSamples > 1) 
    { 
        while (1) 
        { 
            while (fSlopeCount > 1.0f) 
            { 
                fSlopeCount -= 1.0f; 
                used ++; 
                if (used >= nSamples - 1) goto end; 
            } 
            srcPos = 2 * used;  
            dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos] 
                + fSlopeCount * src[srcPos + 2]); 
            dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1] 
                + fSlopeCount * src[srcPos + 3]);  
            i++; 
            fSlopeCount += fRate; 
        } 
    } 
end: 
    sPrevSampleL = src[2 * nSamples - 2]; 
    sPrevSampleR = src[2 * nSamples - 1];  
    return i; 

完整的例程: 
//对16位双声道pcm编码的wav文件插值。 
//作者:核桃 
//修改main函数中float rate变量的值可以改变播放速度 
//取值范围最好在0.5~2之间 
#include <stdio.h> 
#include <stdlib.h> 
#define BUFF_SIZE 4096 
#define SCALE 3 
#ifndef uint 
typedef unsigned int uint; 
#endif 
typedef struct 

    char riff_char[4]; 
    int  package_len; 
    char wave[4]; 
} WavRiff; 
typedef struct 

    char  fmt[4]; 
    int   format_len; 
    short fixed; 
    short channel_number; 
    int   sample_rate; 
    int   byte_rate; 
    short byte_per_sample; 
    short bits_per_sample; 
} WavFormat; 
typedef struct 

    char  data_field[4]; 
    uint  data_len; 
} WavData; 
typedef struct 

    WavRiff   riff; 
    WavFormat format; 
    WavData   data; 
} WavHeader; 
short inbuffer[BUFF_SIZE]; 
short outbuffer[SCALE*BUFF_SIZE]; 
short sPrevSampleL = 0,sPrevSampleR = 0; 
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples); 
uint InterpolationStereo(float fRate,short *dest,short *src,uint nSamples) 

    uint srcPos = 0, i = 0, used = 0; 
 float fSlopeCount = 0.0f; 
  
    if (nSamples == 0) return 0; 
    while (fSlopeCount <= 1.0f) 
    { 
        dest[2 * i] = (short)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src 
[0]); 
        dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount * 
src[1]); 
        i++; 
        fSlopeCount += fRate; 
    } 
    fSlopeCount -= 1.0f;  
    if (nSamples > 1) 
    { 
        while (1) 
        { 
            while (fSlopeCount > 1.0f) 
            { 
                fSlopeCount -= 1.0f; 
                used ++; 
                if (used >= nSamples - 1) goto end; 
            } 
            srcPos = 2 * used;  
            dest[2 * i] = (short)((1.0f - fSlopeCount) * src[srcPos] 
                + fSlopeCount * src[srcPos + 2]); 
            dest[2 * i + 1] = (short)((1.0f - fSlopeCount) * src[srcPos + 1] 
                + fSlopeCount * src[srcPos + 3]);  
            i++; 
            fSlopeCount += fRate; 
        } 
    } 
end: 
    sPrevSampleL = src[2 * nSamples - 2]; 
    sPrevSampleR = src[2 * nSamples - 1];  
    return i; 

int main(int arg, char **argc) 

 FILE *pIn = NULL,*pOut = NULL; 
 WavHeader *wheader = NULL; 
 uint isamples = 0,numBytes = 0,totalBytes = 0; 
 float rate = 0.8f; 
 if (arg < 3) 
 { 
  printf("usage: %s input.wav output.wav/n",argc[0]); 
  return -1; 
 } 
 wheader = (WavHeader *)malloc(sizeof(WavHeader)); 
 pIn = fopen(argc[1], "rb"); 
 pOut = fopen(argc[2], "wb"); 
 numBytes = fread(wheader,1,sizeof(WavHeader),pIn); 
 fwrite(wheader,1,sizeof(WavHeader),pOut); 
  
 while(!feof(pIn)) 
 { 
  numBytes = fread(inbuffer,1,BUFF_SIZE,pIn); 
  numBytes = numBytes >> 2; 
  isamples = InterpolationStereo(rate,outbuffer,inbuffer,numBytes); 
  isamples = isamples << 2; 
  fwrite(outbuffer,1,isamples,pOut); 
  totalBytes += isamples; 
 } 
 fseek(pOut, 0, SEEK_SET); 
 wheader->data.data_len = totalBytes; 
 wheader->riff.package_len = totalBytes + sizeof(WavHeader)-4*sizeof(char)- 
   
  sizeof(int); 
 fwrite(wheader,1,sizeof(WavHeader),pOut); 
 free(wheader); 
 fclose(pIn); 
 fclose(pOut); 
 return 0; 

  PS:根据某某人的采样定理,采样频率一定要等于或者大于信号中最高频率的2倍。我们可以理解 
为,最快播放速度不宜超过2倍,最慢播放速度不宜低于原来信号的1/2也就是0.5倍,如果不是声 
音失真会非常严重。
 

你可能感兴趣的:(ST的插值算法)