关于mfcc系数提取

文件 "mfcc.cpp"

 

#include  " mfcc.h "
#include 
< math.h >
#include 
< stdlib.h >

mfcc::mfcc()
{
        melbank();
        calcu_hamming();
        cal_stren_win();

        calcu_dct();
        qi
=1;
        
int i,j;
        
for(i=0;i<5;i++)
                
for(j=1;j<=256;j++)
                
{
                        buf[i][j]
=0;
                }

        
for(i=0;i<=256;i++)
        
{
                queue[i]
=0;     workingi[i]=0;
        }

        Pos
=0;
}


/*%归一化倒谱提升窗口
w=1+6*sin(pi*[1:12]./12);
w=w/max(w);
*/


void  mfcc::proc()
{
        
int i,j;
        
double sum;
        
double proc_buf1[25];
//      for(i=256;i>=1;i--)working[i]=working[i]-0.9375*working[i-1];

        
for(i=256;i>=1;i--)working[i]=working[i]-working[i-1];
        
//    s=y'.*hamming(256);
        for(i=1;i<=256;i++)working[i-1]=working[i]*hamming[i];
        
//同时错一格。
//    t=abs(fft(s));
        fft();
        
for(i=256;i>=1;i--)
        
{working[i]=working[i-1]*working[i-1]+workingi[i-1]*workingi[i-1];
                workingi[i
-1]=0;
        }

        
for(i=1;i<=24;i++)
        
{
                sum
=0;
                
for(j=1;j<=129;j++)
                
{
                        sum
=sum+x[i][j]*working[j];
                }

                
if(sum!=0)
                proc_buf1[i]
=log(sum);
                
else proc_buf1[i]=-3000;
        }

        
for(i=1;i<=12;i++)
        
{
                sum
=0;

                
for(j=1;j<=24;j++)
                
{
                        sum
=sum+dctcoef[i][j]*proc_buf1[j];
                }

                buf[Pos][i]
=sum*stren_win[i];
        }

        Pos
=(Pos+1)%5;
        
for(i=1;i<=12;i++){
                result[i]
=buf[(Pos+2)%5][i];
//    dtm(i,:)=-2*m(i-2,:)-m(i-1,:)+m(i+1,:)+2*m(i+2,:);
                result[i+12]=-2*buf[(Pos)%5][i]
               
-buf[(Pos+1)%5][i]+buf[(Pos+3)%5][i]+2*buf[(Pos+4)%5][i];
                result[i
+12]=result[i+12]/3;
        }

}


void  mfcc::cal_stren_win()
{
        
int i;
        
double b=0.0;
        
for(i=1;i<=12;i++){
                stren_win[i]
=1+6*sin(pi*(double)i/(double)12);
                
if(b<stren_win[i])b=stren_win[i];
        }

        
for(i=1;i<=12;i++){
                stren_win[i]
=stren_win[i]/b;
        }

}


void  mfcc::calcu_hamming()
{
        
int i;
        
for(i=1;i<=256;i++){
        hamming[i]
=0.54-0.46*cos(2*pi*(i-1)/(256-1));
        }

}


void  mfcc::read( short   * data)
{
        
int i;
        
for(i=0;i<INC_MFCC;i++){
                queue[qi]
=(double)data[i];
                qi
=qi%257+1;
        }

        Level
=0.0;
        
for(i=0;i<257;i++){
                working[i]
=queue[(qi+i+256)%257+1];
                Level
=Level+fabs(working[i]);
        }

}


void  mfcc:: set ()
{
        
int i;
        
for(i=0;i<257;i++) working[i]=i;
//      working[1]=1;
}


void  mfcc::calcu_dct()
{
/*      %DCT系数,12*24
for k=1:12
    n=0:23;
    dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));
end
*/

        
int k,n;
        
for(k=1;k<=12;k++)
        
for(n=0;n<=23;n++)
        
{
                dctcoef[k][n
+1]=cos((double)(2*n+1)*k*pi/(double)(2*24));
        }

}


void  mfcc::melbank()
{
        
double f0,fn2,lr;
        
int b1,b2,b3,b4,k2,k3,k4,mn,mx;
        
double bl[5];
        
double pf[LEN_PF_MFCC],fp[LEN_PF_MFCC],pm[LEN_PF_MFCC],v[LEN_PF_MFCC];
        
int r[LEN_PF_MFCC],c[LEN_PF_MFCC];
        
int i,j;
        
for(i=0;i<SIZE_X_X_MFCC;i++){
                
for(j=0;j<SIZE_X_Y_MFCC;j++){
                        x[i][j]
=0.0;
                }

        }

        f0
=(double)700/(double)FS_MFCC;
//      fn2=floor(n/2);

        fn2
=floor((double)N_MFCC/2);

        lr
=log((f0+FH_MFCC)/(f0+FL_MFCC))/(P_MFCC+1.0);
//// convert to fft bin numbers with 0 for DC term
//      bl=N*((f0+FL)*exp([0 1 p p+1]*lr)-f0);
        bl[1]=N_MFCC*((f0+FL_MFCC)*exp(0*lr)-f0);
        bl[
2]=N_MFCC*((f0+FL_MFCC)*exp(1*lr)-f0);
        bl[
3]=N_MFCC*((f0+FL_MFCC)*exp(P_MFCC*lr)-f0);
        bl[
4]=N_MFCC*((f0+FL_MFCC)*exp((P_MFCC+1)*lr)-f0);

        b2
=(int)ceil(bl[2]);
        b3
=(int)floor(bl[3]);
/*if any(w=='y')
  pf=log((f0+(b2:b3)/n)/(f0+fl))/lr;
  fp=floor(pf);
  r=[ones(1,b2) fp fp+1 p*ones(1,fn2-b3)];
  c=[1:b3+1 b2+1:fn2+1];
  v=2*[0.5 ones(1,b2-1) 1-pf+fp pf-fp ones(1,fn2-b3-1) 0.5];
  mn=1;
  mx=fn2+1;
else
*/

  b1
=(int)floor(bl[1])+1;
  b4
=(int)__min(fn2,ceil(bl[4]))-1;
        k2
=b2-b1+1;
        k3
=b3-b1+1;
        k4
=b4-b1+1;
  mn
=b1+1;
  mx
=b4+1;
//  pf=log((f0+(b1:b4)/n)/(f0+fl))/lr;
  for(i=1,j=b1;j<=b4;i++,j++){
        pf[i]
=log(((double)f0+(double)i/(double)N_MFCC)/(f0+FL_MFCC))/lr;
//  fp=floor(pf);
        fp[i]=floor(pf[i]);
//  pm=pf-fp;
        pm[i]=pf[i]-fp[i];
  }

//  k2=b2-b1+1;
//  k3=b3-b1+1;
//  k4=b4-b1+1;
//  r=[fp(k2:k4) 1+fp(1:k3)];
//  c=[k2:k4 1:k3];
//  v=2*[1-pm(k2:k4) pm(1:k3)];

  
for(i=1,j=k2;j<=k4;i++,j++){
        r[i]
=(int)fp[j];
        c[i]
=j;
        v[i]
=2*(1-pm[j]);
  }

  
for(j=1;j<=k3;j++,i++){
        r[i]
=1+(int)fp[j];
        c[i]
=j;
        v[i]
=2*pm[j];
  }


//  mn=b1+1;
//  mx=b4+1;
//if any(w=='n')
//  v=1-cos(v*pi/2);
//elseif any(w=='m')
//  v=1-0.92/1.08*cos(v*pi/2);
  for(j=1;j<i;j++){
        v[j]
=1-0.92/1.08*cos(v[j]*pi/2);
        x[r[j]][c[j]
+mn-1]=v[j];
  }

//end
//if nargout > 1
//  x=sparse(r,c,v);
//else

//  x=sparse(r,c+mn-1,v,p,1+fn2);
//end
        double buf;
        
for(i=1;i<=24;i++)
        
for(j=1;j<=129;j++)
                
if(x[i][j]>buf)buf=x[i][j];

        
for(i=1;i<=24;i++)
        
for(j=1;j<=129;j++)
                x[i][j]
=x[i][j]/buf;

}


void  mfcc::fft()
{
        
int i,j,k,n,nv2,nm1,l,le,le1,ip,sign=-1;
        
double tr,ti,ur,ui,wr,wi;
        
double *xr;
        
double *xi;
        xr
=working;
        xi
=workingi;
        n
=1<<8;
        nv2
=n>>1;
        nm1
=n-1;
        j
=0;
        
for(i=0;i<nm1;i++)
        
{
                
if(i<j){
                        tr
=xr[j];
                        ti
=xi[j];
                        xr[j]
=xr[i];
                        xi[j]
=xi[i];
                        xr[i]
=tr;
                        xi[i]
=ti;
                }

                k
=nv2;
                
while(k<=j){
                        j
-=k;
                        k
=k>>1;
                }

                j
+=k;
        }

        
for(l=1;l<=8;l++)
        
{
                le
=1<<l;
                le1
=le/2;
                ur
=1.;
                ui
=0.;
                wr
=cos(pi/le1);
                wi
=-sin(pi/le1);
                
for(j=0;j<le1;j++)
                
{

                        
for(i=j;i<n;i+=le)
                        
{
                                ip
=i+le1;
                                tr
=xr[ip]*ur-xi[ip]*ui;
                                ti
=xr[ip]*ui+xi[ip]*ur;
                                xr[ip]
=xr[i]-tr;
                                xi[ip]
=xi[i]-ti;
                                xr[i]
=xr[i]+tr;
                                xi[i]
=xi[i]+ti;
                        }

                        tr
=ur*wr-ui*wi;
                        ti
=ur*wi+ui*wr;
                        ur
=tr;
                        ui
=ti;

                }

        }

        
return;
}


int  mfcc::report_begin()
{
        
if(Level>THRESHOUD_BEGIN*257)return(1);
        
else return(0);
}


int  mfcc::report_end()
{
        
if(Level<THRESHOUD_END*257)return(1);
        
else return(0);
}

                                              

文件"mfcc.h"

 

#ifndef MFCC_H_XPP
#define  MFCC_H_XPP
#define  X_M_MFCC
#define  X_N_MFCC
#define  FL_MFCC 0  // low end of the
// lowest filter as a fraction of fs (default = 0)
#define  FH_MFCC 0.5  // high end of
// highest filter as a fraction of fs (default = 0.5)                                                                    
#define  FS_MFCC 8000  // sample rate in Hz
#define  N_MFCC 256  // length of fft
#define  P_MFCC 24   // number of filters in filterbank
#define  LEN_PF_MFCC 256
#define  pi 3.141592653589
#define  SIZE_X_X_MFCC 32
#define  SIZE_X_Y_MFCC 140
#define  SIZE_DCT_X_MFCC 13
#define  SIZE_DCT_Y_MFCC 25
#define  INC_MFCC 80
#define  THRESHOUD_BEGIN 350
#define  THRESHOUD_END 250

class  mfcc {
private:

        
void melbank();
        
double x[SIZE_X_X_MFCC][SIZE_X_Y_MFCC];
        
double dctcoef[SIZE_DCT_X_MFCC][SIZE_DCT_Y_MFCC];
        
void calcu_dct();
        
double hamming[257];
        
void calcu_hamming();
        
void cal_stren_win();
        
double stren_win[13];
        
int Pos;
        
double buf[5][25];
        
double queue[300];
        
double working[257];
        
double workingi[257];
        
void fft();
        
int qi;
        
double Level;
public:
        
double result[25];
        
void proc();
        
void set();
        
void read(short *);
        
int report_begin();
        
int report_end();
        mfcc();
}
;

#endif                                              

样例程序:

 

#include  " mfcc.h "
void  main( int  argc,  char   * argv[])
{
        mfcc test;
        
while(1){
//      将长度80的语音缓冲区刷新的操作。这里简略。
        test.read(SpeechBuffer);//SpeechBuffer是从第0个有效的一段80个语音样点。                                         
                
//语音样点是短整型16位PCM信号,
        test.proc();
/*      接下来,对象test提供3个结果如下:
        test.result;test.result是从第一个有效的,并非从第0个有效,
                        //是24个参数,前12个是主参数,后12个是查分参数。
        test.report_begin()语音起始标志
        test.report_end()语音结束标志。
*/

        }

}

来源:
以上程序是我在学(精华区x-3-1-6)课程的
时候做大作业时自己写的一个类。
当时我是根据Matlab中MFCC的源码中翻译成C++的。
在现在的Matlab中没有这个函数,
得要到
http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
上把相应的语音库当下来,
然后,
自己编一个Matlab程序清单如下:
文件"mfcc.m"
function ccc=mfcc(x)
%归一化mel滤波器组系数
bank=melbankm(24,256,8000,0,0.5,'m');
bank=full(bank);
bank=bank/max(bank(:));

%DCT系数,12*24
for k=1:12
    n=0:23;
    dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));
end

%归一化倒谱提升窗口
w=1+6*sin(pi*[1:12]./12);
w=w/max(w);

%预加重滤波器
xx=double(x);
xx=filter([1 -1],1,xx);

%语音信号分帧
%xx=enframe(xx,256,80);
xppl=length(xx);
j=1;
for i=65:80:xppl-256,
    xx1(j,:)=xx(i:i+256-1)';
    j=j+1;
end
xx=xx1;

%计算每帧的MFCC参数
for i=1:size(xx,1)
    y=xx(i,:);
    s=y'.*hamming(256);
    t=abs(fft(s));
    t=t.^2;
    t=t+2*realmin;
    c1=dctcoef*log(bank*t(1:129));
    c2=c1.*w';
    m(i,:)=c2';
end

%差分参数
dtm=zeros(size(m));
for i=3:size(m,1)-2
    dtm(i,:)=-2*m(i-2,:)-m(i-1,:)+m(i+1,:)+2*m(i+2,:);
end
dtm=dtm/3;

%合并mfcc参数和一阶差分mfcc参数
ccc=[m dtm];
%去除首尾两帧,因为这两帧的一阶差分参数为0
ccc=ccc(3:size(m,1)-2,:);
return

你可能感兴趣的:(关于mfcc系数提取)