Speex 回声消除

Speex 回声消除

转载自: http://blog.csdn.net/dxpqxb/article/details/7928591

为什么需要声学回声消除呢?在一般的VOIP软件或视频会议系统中,假设我们只有A和B两个人在通话,首先,A的声音传给B,B然后用喇叭放出来,而这时B的MIC呢则会采集到喇叭放出来的声音,然后传回给A,如果这个传输的过程中时延足够大,A就会听到一个和自己刚才说过的话一样的声音,这就是回声,声学回声消除器的作用就是在B端对B采集到的声音进行处理,把采集到声音包含的A的声音去掉再传给A,这样,A就不会听到自己说过的话了。
声学回声消除的原理我就不说了,这在网上有很多文档,网上缺少的是实现,所以,我在这把一个开源的声学回声消除器介绍一下,希望对有些有人用,如果有人知道怎么把这消除器用的基于实时流的VOIP软件中,希望能一起分享一下。
这个声学回声消除器是一个著名的音频编解码器speex中的一部分,1.1.9版本后的回声消除器才起作用,以前版本的都不行,我用的也是这个版本,测试表明,用同一个模拟文件,它有效果比INTEL IPP库4.1版中的声学回声消除器的还要好。
先说编译。首先,从www.speex.org上下载speex1.1.9的源代码,解压,打开speex\win32\libspeex中的libspeex.dsw,这个工作区里有两个工程,一个是 libspeex,另一个是libspeex_dynamic。然后,将libspeex中的mdf.c文件添加到工程libspeex中,编译即可。
以下是我根据文档封装的一个类,里面有一个测试程序: //file name: speexEC.h
#ifndef SPEEX_EC_H
#define SPEEX_EC_H
#include <stdio.h>
#include <stdlib.h>
#include "speex/speex_echo.h"
#include "speex/speex_preprocess.h"
class CSpeexEC
{
public:
CSpeexEC();
~CSpeexEC();
void Init(int frame_size=160, int filter_length=1280, int sampling_rate=8000);
void DoAEC(short *mic, short *ref, short *out);

protected:
void Reset();

private:
bool           m_bHasInit;
SpeexEchoState*     m_pState;
       SpeexPreprocessState* m_pPreprocessorState;
int           m_nFrameSize;
int           m_nFilterLen;
int           m_nSampleRate;
float*           m_pfNoise;
};

#endif

//fine name:speexEC.cpp
#include "SpeexEC.h"

CSpeexEC::CSpeexEC()
{
m_bHasInit     = false;
m_pState     = NULL;
m_pPreprocessorState   = NULL;
m_nFrameSize     = 160;
m_nFilterLen     = 160*8;
m_nSampleRate     = 8000;
m_pfNoise     = NULL;
}

CSpeexEC::~CSpeexEC()
{
Reset();
}

void CSpeexEC::Init(int frame_size, int filter_length, int sampling_rate)
{
Reset();

if (frame_size<=0 || filter_length<=0 || sampling_rate<=0)
{
   m_nFrameSize   =160;
   m_nFilterLen   = 160*8;
   m_nSampleRate = 8000;
}
else
{
   m_nFrameSize   =frame_size;
   m_nFilterLen   = filter_length;
   m_nSampleRate = sampling_rate;
}

m_pState = speex_echo_state_init(m_nFrameSize, m_nFilterLen);
m_pPreprocessorState = speex_preprocess_state_init(m_nFrameSize, m_nSampleRate);
m_pfNoise = new float[m_nFrameSize+1];
m_bHasInit = true;
}

void CSpeexEC::Reset()
{
if (m_pState != NULL)
{
   speex_echo_state_destroy(m_pState);
   m_pState = NULL;
}
if (m_pPreprocessorState != NULL)
{
   speex_preprocess_state_destroy(m_pPreprocessorState);
   m_pPreprocessorState = NULL;
}
if (m_pfNoise != NULL)
{
   delete []m_pfNoise;
   m_pfNoise = NULL;
}
m_bHasInit = false;
}

void CSpeexEC:DoAEC(short* mic, short* ref, short* out)
{
if (!m_bHasInit)
   return;

speex_echo_cancel(m_pState, mic, ref, out, m_pfNoise);
       speex_preprocess(m_pPreprocessorState, (__int16 *)out, m_pfNoise);
    
}

可以看出,这个回声消除器类很简单,只要初始化一下就可以调用了。但是,要注意的是,传给回声消除器的两个声音信号,必须同步得非常的好,就是说,在B端,接收到A说的话以后,要把这些话音数据传给回声消除器做参考,然后再传给声卡,声卡再放出来,这有一段延时,这时,B再采集,然后传给回声消除器,与那个参考数据比较,从采集到的数据中把频域和参考数据相同的部分消除掉。如果传给消除器的两个信号同步得不好,即两个信号找不到频域相同的部分,就没有办法进行消除了。
测试程序:

#define NN 160
void main()
{
FILE* ref_fd, *mic_fd, *out_fd;
short ref[NN], mic[NN], out[NN];
ref_fd = fopen ("ref.pcm", "rb"); //打开参考文件,即要消除的声音
mic_fd = fopen ("mic.pcm",   "rb");//打开mic采集到的声音文件,包含回声在里面
out_fd = fopen ("echo.pcm", "wb");//消除了回声以后的文件

CSpeexEC ec;
ec.Init();

while (fread(mic, 1, NN*2, mic_fd))
     {
           fread(ref, 1, NN*2, ref_fd);  
           ec.DoAEC(mic, ref, out);
           fwrite(out, 1, NN*2, out_fd);
     }
 
     fclose(ref_fd);
     fclose(mic_fd);
     fclose(out_fd);
}

  以上的程序是用文件来模拟回声和MIC,但在实时流中是大不一样的,在一般的VOIP软件中,接收对方的声音并传到声卡中播放是在一个线程中进行的,而采集本地的声音并传送到对方又是在另一个线程中进行的,而声学回声消除器在对采集到的声音进行回声消除的同时,还需要播放线程中的数据作为参考,而要同步这两个线程中的数据是非常困难的,因为稍稍有些不同步,声学回声消除器中的自适应滤波器就会发散,不但消除不了回声,还会破坏原始采集到的声音,使被破坏的声音难以分辨。我做过好多尝试,始终无法用软件来实现对这两个线程中的数据进行同步,导致实现失败,希望有经验的网友们一起分享一下这方面的经验。



示例代码:


Sample code

This section shows sample code for encoding and decoding speech using the Speex API. The commands can be used to encode and decode a file by calling:
% sampleenc in_file.sw | sampledec out_file.sw
where both files are raw (no header) files encoded at 16 bits per sample (in the machine natural endianness).

sampleenc.c

sampleenc takes a raw 16 bits/sample file, encodes it and outputs a Speex stream to stdout. Note that the packing used is NOT compatible with that of speexenc/speexdec.


#include <speex/speex.h>
#include <stdio.h>

#define FRAME_SIZE 160
int main(int argc, char **argv)
{
char *inFile;
FILE *fin;
short in[FRAME_SIZE];
float input[FRAME_SIZE];
char cbits[200];
int nbBytes;

void *state;

SpeexBits bits;
int i, tmp;

state = speex_encoder_init(&speex_nb_mode);

tmp=8;
speex_encoder_ctl(state, SPEEX_SET_QUALITY, &tmp);
inFile = argv[1];
fin = fopen(inFile, "r");

speex_bits_init(&bits);
while (1)
{

fread(in, sizeof(short), FRAME_SIZE, fin);
if (feof(fin))
break;

for (i=0;i<FRAME_SIZE;i++)
input[i]=in[i];

speex_bits_reset(&bits);

speex_encode(state, input, &bits);

nbBytes = speex_bits_write(&bits, cbits, 200);

fwrite(&nbBytes, sizeof(int), 1, stdout);

fwrite(cbits, 1, nbBytes, stdout);
}

speex_encoder_destroy(state);

speex_bits_destroy(&bits);
fclose(fin);
return 0;
}

sampledec.c

sampledec reads a Speex stream from stdin, decodes it and outputs it to a raw 16 bits/sample file. Note that the packing used is NOT compatible with that of speexenc/speexdec.


#include <speex/speex.h>
#include <stdio.h>

#define FRAME_SIZE 160
int main(int argc, char **argv)
{
char *outFile;
FILE *fout;

short out[FRAME_SIZE];

float output[FRAME_SIZE];
char cbits[200];
int nbBytes;

void *state;

SpeexBits bits;
int i, tmp;

state = speex_decoder_init(&speex_nb_mode);

tmp=1;
speex_decoder_ctl(state, SPEEX_SET_ENH, &tmp);
outFile = argv[1];
fout = fopen(outFile, "w");

speex_bits_init(&bits);
while (1)
{

fread(&nbBytes, sizeof(int), 1, stdin);
fprintf (stderr, "nbBytes: %d\n", nbBytes);
if (feof(stdin))
break;

fread(cbits, 1, nbBytes, stdin);

speex_bits_read_from(&bits, cbits, nbBytes);

speex_decode(state, &bits, output);

for (i=0;i<FRAME_SIZE;i++)
out[i]=output[i];

fwrite(out, sizeof(short), FRAME_SIZE, fout);
}

speex_decoder_destroy(state);

speex_bits_destroy(&bits);
fclose(fout);
return 0;
}




 


 




开源 H323 协议中封装的使用参考代码:




#include <ptlib.h>

#ifdef __GNUC__
#pragma implementation "speexcodec.h"
#endif

#include "speexcodec.h"

#include "h323caps.h"
#include "h245.h"
#include "rtp.h"

extern "C" {
#include "speex/libspeex/speex.h"
};


#define new PNEW

#define XIPH_COUNTRY_CODE             0xB5   // (181) Country code for United States
#define XIPH_T35EXTENSION             0
#define XIPH_MANUFACTURER_CODE   0x0026 // Allocated by Delta Inc

#define EQUIVALENCE_COUNTRY_CODE              // Country code for Australia
#define EQUIVALENCE_T35EXTENSION             0
#define EQUIVALENCE_MANUFACTURER_CODE   61 // Allocated by Australian Communications Authority, Oct 2000

#define SAMPLES_PER_FRAME               160

#define SPEEX_BASE_NAME "Speex"

#define SPEEX_NARROW2_H323_NAME       SPEEX_BASE_NAME "Narrow-5.95k{sw}"
#define SPEEX_NARROW3_H323_NAME       SPEEX_BASE_NAME "Narrow-8k{sw}"
#define SPEEX_NARROW4_H323_NAME       SPEEX_BASE_NAME "Narrow-11k{sw}"
#define SPEEX_NARROW5_H323_NAME       SPEEX_BASE_NAME "Narrow-15k{sw}"
#define SPEEX_NARROW6_H323_NAME       SPEEX_BASE_NAME "Narrow-18.2k{sw}"

H323_REGISTER_CAPABILITY(SpeexNarrow2AudioCapabil ity, SPEEX_NARROW2_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow3AudioCapabil ity, SPEEX_NARROW3_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow4AudioCapabil ity, SPEEX_NARROW4_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow5AudioCapabil ity, SPEEX_NARROW5_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow6AudioCapabil ity, SPEEX_NARROW6_H323_NAME);

#define XIPH_SPEEX_NARROW2_H323_NAME       SPEEX_BASE_NAME "Narrow-5.95k(Xiph){sw}"
#define XIPH_SPEEX_NARROW3_H323_NAME       SPEEX_BASE_NAME "Narrow-8k(Xiph){sw}"
#define XIPH_SPEEX_NARROW4_H323_NAME       SPEEX_BASE_NAME "Narrow-11k(Xiph){sw}"
#define XIPH_SPEEX_NARROW5_H323_NAME       SPEEX_BASE_NAME "Narrow-15k(Xiph){sw}"
#define XIPH_SPEEX_NARROW6_H323_NAME       SPEEX_BASE_NAME "Narrow-18.2k(Xiph){sw}"

H323_REGISTER_CAPABILITY(XiphSpeexNarrow2AudioCap ability, XIPH_SPEEX_NARROW2_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow3AudioCap ability, XIPH_SPEEX_NARROW3_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow4AudioCap ability, XIPH_SPEEX_NARROW4_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow5AudioCap ability, XIPH_SPEEX_NARROW5_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow6AudioCap ability, XIPH_SPEEX_NARROW6_H323_NAME);

/////////////////////////////////////////////////////////////////////////

static int Speex_Bits_Per_Second(int mode) {
       void *tmp_coder_state;
       int bitrate;
       tmp_coder_state = speex_encoder_init(&speex_nb_mode);
       speex_encoder_ctl(tmp_coder_state, SPEEX_SET_QUALITY, &mode);
       speex_encoder_ctl(tmp_coder_state, SPEEX_GET_BITRATE, &bitrate);
       speex_encoder_destroy(tmp_coder_state);
       return bitrate;
}

static int Speex_Bytes_Per_Frame(int mode) {
       int bits_per_frame = Speex_Bits_Per_Second(mode) / 50; // (20ms frame size)
       return ((bits_per_frame+7)/8); // round up
}

OpalMediaFormat const OpalSpeexNarrow_5k95(OPAL_SPEEX_NARROW_5k95,
                                                                                     OpalMediaFormat::DefaultAudioSessionID,
                                                                                     RTP_DataFrame::DynamicBase,
                                                                                     TRUE,   // Needs jitter
                                                                                     Speex_Bits_Per_Second(2),
                                                                                     Speex_Bytes_Per_Frame(2),
                                                                                     SAMPLES_PER_FRAME, // 20 milliseconds
                                                                                     OpalMediaFormat::AudioTimeUnits);

OpalMediaFormat const OpalSpeexNarrow_8k(OPAL_SPEEX_NARROW_8k,
                                                                                 OpalMediaFormat::DefaultAudioSessionID,
                                                                                 RTP_DataFrame::DynamicBase,
                                                                                 TRUE,   // Needs jitter
                                                                                 Speex_Bits_Per_Second(3),
                                                                                 Speex_Bytes_Per_Frame(3),
                                                                                 SAMPLES_PER_FRAME, // 20 milliseconds
                                                                                 OpalMediaFormat::AudioTimeUnits);

OpalMediaFormat const OpalSpeexNarrow_11k(OPAL_SPEEX_NARROW_11k,
                                                                                   OpalMediaFormat::DefaultAudioSessionID,
                                                                                   RTP_DataFrame::DynamicBase,
                                                                                   TRUE,   // Needs jitter
                                                                                   Speex_Bits_Per_Second(4),
                                                                                   Speex_Bytes_Per_Frame(4),
                                                                                   SAMPLES_PER_FRAME, // 20 milliseconds
                                                                                   OpalMediaFormat::AudioTimeUnits);

OpalMediaFormat const OpalSpeexNarrow_15k(OPAL_SPEEX_NARROW_15k,
                                                                                   OpalMediaFormat::DefaultAudioSessionID,
                                                                                   RTP_DataFrame::DynamicBase,
                                                                                   TRUE,   // Needs jitter
                                                                                   Speex_Bits_Per_Second(5),
                                                                                   Speex_Bytes_Per_Frame(5),
                                                                                   SAMPLES_PER_FRAME, // 20 milliseconds
                                                                                   OpalMediaFormat::AudioTimeUnits);

OpalMediaFormat const OpalSpeexNarrow_18k2(OPAL_SPEEX_NARROW_18k2,
                                                                                     OpalMediaFormat::DefaultAudioSessionID,
                                                                                     RTP_DataFrame::DynamicBase,
                                                                                     TRUE,   // Needs jitter
                                                                                     Speex_Bits_Per_Second(6),
                                                                                     Speex_Bytes_Per_Frame(6),
                                                                                     SAMPLES_PER_FRAME, // 20 milliseconds
                                                                                     OpalMediaFormat::AudioTimeUnits);


/////////////////////////////////////////////////////////////////////////

SpeexNonStandardAudioCap ability::SpeexNonStandardAudioCap ability(int mode)
   : H323NonStandardAudioCapa bility(1, 1,
                                                                     EQUIVALENCE_COUNTRY_CODE,
                                                                     EQUIVALENCE_T35EXTENSION,
                                                                     EQUIVALENCE_MANUFACTURER_CODE,
                                                                     NULL, 0, 0, P_MAX_INDEX)
{
   PStringStream s;
   s << "Speex bs" << speex_nb_mode.bitstream_version << " Narrow" << mode;
   PINDEX len = s.GetLength();
   memcpy(nonStandardData.GetPointer(len), (const char *)s, len);
}


/////////////////////////////////////////////////////////////////////////

SpeexNarrow2AudioCapabil ity::SpeexNarrow2AudioCapabil ity()
   : SpeexNonStandardAudioCap ability(2)
{
}


PObject * SpeexNarrow2AudioCapabil ity::Clone() const
{
   return new SpeexNarrow2AudioCapabil ity(*this);
}


PString SpeexNarrow2AudioCapabil ity::GetFormatName() const
{
   return SPEEX_NARROW2_H323_NAME;
}


H323Codec * SpeexNarrow2AudioCapabil ity::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_5k95, 2, direction);
}


/////////////////////////////////////////////////////////////////////////

SpeexNarrow3AudioCapabil ity::SpeexNarrow3AudioCapabil ity()
   : SpeexNonStandardAudioCap ability(3)
{
}


PObject * SpeexNarrow3AudioCapabil ity::Clone() const
{
   return new SpeexNarrow3AudioCapabil ity(*this);
}


PString SpeexNarrow3AudioCapabil ity::GetFormatName() const
{
   return SPEEX_NARROW3_H323_NAME;
}


H323Codec * SpeexNarrow3AudioCapabil ity::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_8k, 3, direction);
}


/////////////////////////////////////////////////////////////////////////

SpeexNarrow4AudioCapabil ity::SpeexNarrow4AudioCapabil ity()
   : SpeexNonStandardAudioCap ability(4)
{
}


PObject * SpeexNarrow4AudioCapabil ity::Clone() const
{
   return new SpeexNarrow4AudioCapabil ity(*this);
}


PString SpeexNarrow4AudioCapabil ity::GetFormatName() const
{
   return SPEEX_NARROW4_H323_NAME;
}


H323Codec * SpeexNarrow4AudioCapabil ity::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_11k, 4, direction);
}


/////////////////////////////////////////////////////////////////////////

SpeexNarrow5AudioCapabil ity::SpeexNarrow5AudioCapabil ity()
   : SpeexNonStandardAudioCap ability(5)
{
}


PObject * SpeexNarrow5AudioCapabil ity::Clone() const
{
   return new SpeexNarrow5AudioCapabil ity(*this);
}


PString SpeexNarrow5AudioCapabil ity::GetFormatName() const
{
   return SPEEX_NARROW5_H323_NAME;
}


H323Codec * SpeexNarrow5AudioCapabil ity::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_15k, 5, direction);
}


/////////////////////////////////////////////////////////////////////////

SpeexNarrow6AudioCapabil ity::SpeexNarrow6AudioCapabil ity()
   : SpeexNonStandardAudioCap ability(6)
{
}


PObject * SpeexNarrow6AudioCapabil ity::Clone() const
{
   return new SpeexNarrow6AudioCapabil ity(*this);
}


PString SpeexNarrow6AudioCapabil ity::GetFormatName() const
{
   return SPEEX_NARROW6_H323_NAME;
}


H323Codec * SpeexNarrow6AudioCapabil ity::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_18k2, 6, direction);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNonStandardAudi oCapability::XiphSpeexNonStandardAudi oCapability(int mode)
   : H323NonStandardAudioCapa bility(1, 1,
                                                                     XIPH_COUNTRY_CODE,
                                                                     XIPH_T35EXTENSION,
                                                                     XIPH_MANUFACTURER_CODE,
                                                                     NULL, 0, 0, P_MAX_INDEX)
{
   // FIXME: To be replaced by an ASN defined block of data
   PStringStream s;
   s << "Speex bs" << speex_nb_mode.bitstream_version << " Narrow" << mode;
   PINDEX len = s.GetLength();
   memcpy(nonStandardData.GetPointer(len), (const char *)s, len);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNarrow2AudioCap ability::XiphSpeexNarrow2AudioCap ability()
   : XiphSpeexNonStandardAudi oCapability(2)
{
}


PObject * XiphSpeexNarrow2AudioCap ability::Clone() const
{
   return new XiphSpeexNarrow2AudioCap ability(*this);
}


PString XiphSpeexNarrow2AudioCap ability::GetFormatName() const
{
   return XIPH_SPEEX_NARROW2_H323_NAME;
}


H323Codec * XiphSpeexNarrow2AudioCap ability::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_5k95, 2, direction);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNarrow3AudioCap ability::XiphSpeexNarrow3AudioCap ability()
   : XiphSpeexNonStandardAudi oCapability(3)
{
}


PObject * XiphSpeexNarrow3AudioCap ability::Clone() const
{
   return new XiphSpeexNarrow3AudioCap ability(*this);
}


PString XiphSpeexNarrow3AudioCap ability::GetFormatName() const
{
   return XIPH_SPEEX_NARROW3_H323_NAME;
}


H323Codec * XiphSpeexNarrow3AudioCap ability::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_8k, 3, direction);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNarrow4AudioCap ability::XiphSpeexNarrow4AudioCap ability()
   : XiphSpeexNonStandardAudi oCapability(4)
{
}


PObject * XiphSpeexNarrow4AudioCap ability::Clone() const
{
   return new XiphSpeexNarrow4AudioCap ability(*this);
}


PString XiphSpeexNarrow4AudioCap ability::GetFormatName() const
{
   return XIPH_SPEEX_NARROW4_H323_NAME;
}


H323Codec * XiphSpeexNarrow4AudioCap ability::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_11k, 4, direction);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNarrow5AudioCap ability::XiphSpeexNarrow5AudioCap ability()
   : XiphSpeexNonStandardAudi oCapability(5)
{
}


PObject * XiphSpeexNarrow5AudioCap ability::Clone() const
{
   return new XiphSpeexNarrow5AudioCap ability(*this);
}


PString XiphSpeexNarrow5AudioCap ability::GetFormatName() const
{
   return XIPH_SPEEX_NARROW5_H323_NAME;
}


H323Codec * XiphSpeexNarrow5AudioCap ability::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_15k, 5, direction);
}


/////////////////////////////////////////////////////////////////////////

XiphSpeexNarrow6AudioCap ability::XiphSpeexNarrow6AudioCap ability()
   : XiphSpeexNonStandardAudi oCapability(6)
{
}


PObject * XiphSpeexNarrow6AudioCap ability::Clone() const
{
   return new XiphSpeexNarrow6AudioCap ability(*this);
}


PString XiphSpeexNarrow6AudioCap ability::GetFormatName() const
{
   return XIPH_SPEEX_NARROW6_H323_NAME;
}


H323Codec * XiphSpeexNarrow6AudioCap ability::CreateCodec(H323Codec::Direction direction) const
{
   return new SpeexCodec(OpalSpeexNarrow_18k2, 6, direction);
}


/////////////////////////////////////////////////////////////////////////////

const float MaxSampleValue     = 32767.0;
const float MinSampleValue     = -32767.0;

SpeexCodec::SpeexCodec(const char * name, int mode, Direction dir)
   : H323FramedAudioCodec(name, dir)
{
   PTRACE(3, "Codec\tSpeex mode " << mode << " " << (dir == Encoder ? "en" : "de")
                 << "coder created");

   bits = new SpeexBits;
   speex_bits_init(bits);

   if (direction == Encoder) {
       coder_state = speex_encoder_init(&speex_nb_mode);
       speex_encoder_ctl(coder_state, SPEEX_GET_FRAME_SIZE, &encoder_frame_size);
       speex_encoder_ctl(coder_state, SPEEX_SET_QUALITY,       &mode);
   } else {
       coder_state = speex_decoder_init(&speex_nb_mode);
   }
}

SpeexCodec::~SpeexCodec()
{
   speex_bits_destroy(bits);
   delete bits;

   if (direction == Encoder)
       speex_encoder_destroy(coder_state);
   else
       speex_decoder_destroy(coder_state);
}


BOOL SpeexCodec::EncodeFrame(BYTE * buffer, unsigned & length)
{
   // convert PCM to float
   float floatData[SAMPLES_PER_FRAME];
   PINDEX i;
   for (i = 0; i < SAMPLES_PER_FRAME; i++)
       floatData[i] = sampleBuffer[i];

   // encode PCM data in sampleBuffer to buffer
   speex_bits_reset(bits);
   speex_encode(coder_state, floatData, bits);

   length = speex_bits_write(bits, (char *)buffer, encoder_frame_size);

   return TRUE;
}


BOOL SpeexCodec::DecodeFrame(const BYTE * buffer, unsigned length, unsigned &)
{
   float floatData[SAMPLES_PER_FRAME];

   // decode Speex data to floats
   speex_bits_read_from(bits, (char *)buffer, length);
   speex_decode(coder_state, bits, floatData);

   // convert float to PCM
   PINDEX i;
   for (i = 0; i < SAMPLES_PER_FRAME; i++) {
       float sample = floatData[i];
       if (sample < MinSampleValue)
           sample = MinSampleValue;
       else if (sample > MaxSampleValue)
           sample = MaxSampleValue;
       sampleBuffer[i] = (short)sample;
   }

   return TRUE;
}



VC++ 中使用 API的 char 单字节压缩代码示例:

Encoding and decoding problem in speex 1.0.4
Subject:        Encoding and decoding problem in speex 1.0.4
List-id:        speex-dev.xiph.org

Hi,
                       I am using the speex 1.0.4 library from Windows.
                       I have posted my problem before but didn't get a solution. I am doing an
                       VOIP project
                       in which i am recording sound and streaming it to the peer. I wanted to
                       encode and decode
                       wav files that brought me to this site.
                       I am recording sound in the following format:-
                       m_WaveFormatEx.wFormatTag                   = WAVE_FORMAT_PCM;
                       m_WaveFormatEx.nChannels                     = 1;
                       m_WaveFormatEx.wBitsPerSample           = 8;
                       m_WaveFormatEx.cbSize                           = 0;
                       m_WaveFormatEx.nSamplesPerSec           = 8000;
                       m_WaveFormatEx.nBlockAlign                 = 1;
                       m_WaveFormatEx.nAvgBytesPerSec         = 8000;
                       The recording is as follows :-
                       When the buffer(size = 2000 bytes) gets filled with sound data a
                       function with the body shown
                       below is called.
                       LPWAVEHDR lpHdr = (LPWAVEHDR) lParam;
                       if(lpHdr->dwBytesRecorded==0 || lpHdr==NULL)
                       return ERROR_SUCCESS;
                       ::waveInUnprepareHeader(m_hRecord, lpHdr, sizeof(WAVEHDR));
                       Here lpHdr->lpData contains the audio data in a character array.
                       Now here I want to use Speex codec for encoding the data so the encoding
                       function is
                       called (I am thankful to Tay YueWeng for the function).
                       char *encode(char *buffer, int &encodeSize)
                       {
                       char     *encodedBuffer = new char[RECBUFFER/2];                     
                       short     speexShort;
                       float   speexFloat[RECBUFFER/2];
                       void     *mEncode             = speex_encoder_init(&speex_nb_mode);
                     
                       speex_bits_init(&mBits);
                       // Convert the audio to a short then to a float buffer
                       int       halfBufferSize = RECBUFFER/2;
                       for (int i = 0; i < halfBufferSize; i++)
                       {
                       memcpy(&speexShort, &buffer[i*2], sizeof(short));
                       speexFloat[i]         = speexShort;
                       }
                       // Encode the sound data using the float buffer
                       speex_bits_reset(&mBits);
                       speex_encode(mEncode, speexFloat, &mBits);
                       encodeSize                       = speex_bits_write(&mBits, encodedBuffer,
                       RECBUFFER/2);
                     
                       speex_encoder_destroy(mEncode);
                     
                       speex_bits_destroy(&mBits);
                       // Return the encoded buffer
                       return encodedBuffer;
                       }
                       Here i noticed that though my captured audio data is 2000 bytes the
                       compressed form is
                       always 38 bytes. In the speexFloat array above i get values in the range
                       -32767 to +32767.
                       Is it correct. Also after calling the 'speex_encode' function the first
                       160 values in the
                       input float array i.e. speexFloat is changed (why does it happen?Is
                       anything abnormal).
                       Further after calling the above function for testing I decode the
                       returned encoded data
                       immediately by calling the decoding function shown bellow :-
                       char *decode (char *buffer, int encodeSize)
                       {
                       char *decodedBuffer     = new char[RECBUFFER];
                       short speexShort;
                       float speexFloat[RECBUFFER/2];
                       // Decode the sound data into a float buffer
                       void   *mDecode               = speex_decoder_init(&speex_nb_mode);
                     
                       speex_bits_init(&mBits);
                       int       halfBufferSize = RECBUFFER/2;
                       speex_bits_reset(&mBits);
                       speex_bits_read_from(&mBits, buffer, encodeSize);
                       speex_decode(mDecode, &mBits, speexFloat);
                       // Convert from float to short to char
                       for (int i = 0; i < halfBufferSize; i++)
                       {
                       speexShort = speexFloat[i];
                       memcpy(&decodedBuffer[i*2], &speexShort, sizeof(short));
                       }
                     
                       speex_encoder_destroy(mDecode);
                     
                       speex_bits_destroy(&mBits);
                       // Return the buffer
                       return decodedBuffer;
                       }
                       After decoding using the above function only the first 160 values in the
                       decodedBuffer array is
                       changed. i.e i encoded an 2000 byte audio data to get a 38 byte encoded
                       audio data. On decoding
                       the 38 byte audio data i get an decompressed 160 byte data. I don't
                       understand whats going
                       wrong. I checked all the messages posted in this newsgroup and did'nt
                       find an answer so i am
                       posting this code hoping that it gets solved soon.   Thanks in advance.

你可能感兴趣的:(Speex 回声消除)