Speex是一套开源的音频编解码库,最新版本还包含了回音消除和防抖动等功能,如果我们想开发语音聊天或视频会议这样的系统,Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码(编译后的dll为libspeex.dll),最新版本为1.2。不过源码是用C++开发的,直接在.NET中使用会有诸多不便,为此,我用C#将其封装,使得编解码的调用相当简单。
由于Speex原始导出的API不是很方便C#调用,所以,在用C#封装之前,先要用C++对Speex的原始API进行简化,新建一个名为Speex的VC项目,然后引用libspeex.dll的相关库文件,添加cpp文件后,复制下列源码到文件中:
#include
"
speex\speex.h
"
#include
<
windows.h
>
#include
<
stdio.h
>
#include
<
stdlib.h
>
#include
"
speex/speex_echo.h
"
#include
"
speex/speex_preprocess.h
"
#include
"
Speex.h
"
#define
FRAME_SIZE 160
float
encoder_input[FRAME_SIZE];
void
*
encoder_state;
SpeexBits encoder_bits;
BOOL APIENTRY DllMain( HANDLE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved
)
{
return
TRUE;
}
extern
"
C
"
__declspec(dllexport)
void
encoder_init(
int
quality)
{
encoder_state
=
speex_encoder_init(
&
speex_nb_mode);
speex_encoder_ctl(encoder_state, SPEEX_SET_QUALITY,
&
quality);
speex_bits_init(
&
encoder_bits);
}
extern
"
C
"
__declspec(dllexport)
void
encoder_dispose()
{
speex_encoder_destroy(encoder_state);
speex_bits_destroy(
&
encoder_bits);
}
extern
"
C
"
__declspec(dllexport)
int
encoder_encode(
const
short
*
data,
char
*
output)
{
for
(
int
i
=
0
; i
<
FRAME_SIZE; i
++
)
encoder_input[i]
=
data[i];
speex_bits_reset(
&
encoder_bits);
speex_encode(encoder_state, encoder_input,
&
encoder_bits);
return
speex_bits_write(
&
encoder_bits, output,
200
);
}
float
decoder_output[FRAME_SIZE];
void
*
decoder_state;
SpeexBits decoder_bits;
extern
"
C
"
__declspec(dllexport)
void
decoder_init()
{
decoder_state
=
speex_decoder_init(
&
speex_nb_mode);
int
tmp
=
1
;
speex_decoder_ctl(decoder_state, SPEEX_SET_ENH,
&
tmp);
speex_bits_init(
&
decoder_bits);
}
extern
"
C
"
__declspec(dllexport)
void
decoder_dispose()
{
speex_decoder_destroy(decoder_state);
speex_bits_destroy(
&
decoder_bits);
}
extern
"
C
"
__declspec(dllexport)
void
decoder_decode(
int
nbBytes,
char
*
data,
short
*
output)
{
speex_bits_read_from(
&
decoder_bits, data, nbBytes);
speex_decode(decoder_state,
&
decoder_bits, decoder_output);
for
(
int
i
=
0
; i
<
FRAME_SIZE; i
++
)
{
output[i]
=
decoder_output[i];
}
}
/*
************************************************** 回音消除 *************************************
*/
bool
m_bSpeexEchoHasInit;
SpeexEchoState
*
m_SpeexEchoState;
SpeexPreprocessState
*
m_pPreprocessorState;
int
m_nFilterLen;
int
m_nSampleRate;
float
*
m_pfNoise;
extern
"
C
"
__declspec(dllexport)
void
SpeexEchoCapture(
short
*
input_frame,
short
*
output_frame)
{
speex_echo_capture(m_SpeexEchoState, input_frame, output_frame);
}
extern
"
C
"
__declspec(dllexport)
void
SpeexEchoPlayback(
short
*
echo_frame)
{
speex_echo_playback(m_SpeexEchoState, echo_frame);
}
extern
"
C
"
__declspec(dllexport)
void
SpeexEchoReset()
{
if
(m_SpeexEchoState
!=
NULL)
{
speex_echo_state_destroy(m_SpeexEchoState);
m_SpeexEchoState
=
NULL;
}
if
(m_pPreprocessorState
!=
NULL)
{
speex_preprocess_state_destroy(m_pPreprocessorState);
m_pPreprocessorState
=
NULL;
}
if
(m_pfNoise
!=
NULL)
{
delete []m_pfNoise;
m_pfNoise
=
NULL;
}
m_bSpeexEchoHasInit
=
false
;
}
extern
"
C
"
__declspec(dllexport)
void
SpeexEchoInit(
int
filter_length,
int
sampling_rate ,
bool
associatePreprocesser)
{
SpeexEchoReset();
if
(filter_length
<=
0
||
sampling_rate
<=
0
)
{
m_nFilterLen
=
160
*
8
;
m_nSampleRate
=
8000
;
}
else
{
m_nFilterLen
=
filter_length;
m_nSampleRate
=
sampling_rate;
}
m_SpeexEchoState
=
speex_echo_state_init(FRAME_SIZE, m_nFilterLen);
m_pPreprocessorState
=
speex_preprocess_state_init(FRAME_SIZE, m_nSampleRate);
if
(associatePreprocesser)
{
speex_preprocess_ctl(m_pPreprocessorState, SPEEX_PREPROCESS_SET_ECHO_STATE,m_SpeexEchoState);
}
m_pfNoise
=
new
float
[FRAME_SIZE
+
1
];
m_bSpeexEchoHasInit
=
true
;
}
extern
"
C
" __declspec(dllexport)
void SpeexEchoDoAEC(
short* mic,
short*
ref,
short*
out)
{
if (!m_bSpeexEchoHasInit)
{
return;
}
speex_echo_cancellation(m_SpeexEchoState,(
const __int16 *) mic,(
const __int16 *)
ref,(__int16 *)
out);
}
编译便生成Speex.dll。
如果对VC不熟悉也没关系,文末会直接给出libspeex.dll和Speex.dll的下载,直接使用就OK了。
现在,C#可以调用Speex.dll导出的简单函数了,最终封装的源码如下:
///
<summary>
///
对Speex的C#封装。
///
zhuweisky 2010.05.13
///
</summary>
public
class
Speex :IAudioCodec
{
private
const
int
FrameSize
=
160
;
#region
IsDisposed
private
volatile
bool
isDisposed
=
false
;
public
bool
IsDisposed
{
get
{
return
isDisposed; }
}
#endregion
#region
Ctor
///
<summary>
///
初始化。
///
</summary>
///
<param name="quality">
编码质量,取值0~10
</param>
public
Speex(
int
quality)
{
if
(quality
<
0
||
quality
>
10
)
{
throw
new
Exception(
"
quality value must be between 0 and 10.
"
);
}
Speex.encoder_init(quality);
Speex.decoder_init();
}
#endregion
#region
Dispose
public
void
Dispose()
{
this
.isDisposed
=
true
;
System.Threading.Thread.Sleep(
100
);
Speex.decoder_dispose();
Speex.encoder_dispose();
}
#endregion
#region
Encode
///
<summary>
///
将采集到的音频数据进行编码。
///
</summary>
public
byte
[] Encode(
byte
[] data)
{
if
(
this
.isDisposed)
{
return
null
;
}
if
(data.Length
%
(FrameSize
*
2
)
!=
0
)
{
throw
new
ArgumentException(
"
Invalid Data Length.
"
);
}
int
nbBytes;
short
[] input
=
new
short
[FrameSize];
byte
[] buffer
=
new
byte
[
200
];
byte
[] output
=
new
byte
[
0
];
for
(
int
i
=
0
; i
<
data.Length
/
(FrameSize
*
2
); i
++
)
{
for
(
int
j
=
0
; j
<
input.Length; j
++
)
{
input[j]
=
(
short
)(data[i
*
FrameSize
*
2
+
j
*
2
]
+
data[i
*
FrameSize
*
2
+
j
*
2
+
1
]
*
0x100
);
}
nbBytes
=
Speex.encoder_encode(input, buffer);
Array.Resize
<
byte
>
(
ref
output, output.Length
+
nbBytes
+
sizeof
(
int
));
Array.Copy(buffer,
0
, output, output.Length
-
nbBytes, nbBytes);
for
(
int
j
=
0
; j
<
sizeof
(
int
); j
++
)
{
output[output.Length
-
nbBytes
-
sizeof
(
int
)
+
j]
=
(
byte
)(nbBytes
%
0x100
);
nbBytes
/=
0x100
;
}
}
return
output;
}
#endregion
#region
Decode
///
<summary>
///
将编码后的数据进行解码得到原始的音频数据。
///
</summary>
public
byte
[] Decode(
byte
[] data)
{
if
(
this
.isDisposed)
{
return
null
;
}
int
nbBytes, index
=
0
;
byte
[] input;
short
[] buffer
=
new
short
[FrameSize];
byte
[] output
=
new
byte
[
0
];
while
(index
<
data.Length)
{
nbBytes
=
0
;
index
+=
sizeof
(
int
);
for
(
int
i
=
1
; i
<=
sizeof
(
int
); i
++
)
nbBytes
=
nbBytes
*
0x100
+
data[index
-
i];
input
=
new
byte
[nbBytes];
Array.Copy(data, index, input,
0
, input.Length);
index
+=
input.Length;
Speex.decoder_decode(nbBytes, input, buffer);
Array.Resize
<
byte
>
(
ref
output, output.Length
+
FrameSize
*
2
);
for
(
int
i
=
0
; i
<
FrameSize; i
++
)
{
output[output.Length
-
FrameSize
*
2
+
i
*
2
]
=
(
byte
)(buffer[i]
%
0x100
);
output[output.Length
-
FrameSize
*
2
+
i
*
2
+
1
]
=
(
byte
)(buffer[i]
/
0x100
);
}
}
return
output;
}
#endregion
#region
Pinvoke
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
encoder_init
"
)]
internal
extern
static
void
encoder_init(
int
quality);
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
encoder_dispose
"
)]
internal
extern
static
void
encoder_dispose();
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
encoder_encode
"
)]
internal
extern
static
int
encoder_encode(
short
[] data,
byte
[] output);
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
decoder_init
"
)]
internal
extern
static
void
decoder_init();
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
decoder_dispose
"
)]
internal
extern
static
void
decoder_dispose();
[DllImport(
"
Speex.dll
"
, EntryPoint
=
"
decoder_decode
"
)]
internal
extern
static
void
decoder_decode(
int
nbBytes,
byte
[] data,
short
[] output);
#endregion
}
只有四个方法:Initialize、Encode、Decode、Dispose。方法参数的含义也非常明显。
一般音频对话的整个流程是这样的:采集 -> 编码 -> 网络传输 -> 解码 -> 播放。
而该封装的Speex类解决了这个过程中的音频编码和解码的问题。你可以复制该源码到你的项目,并将从http://www.speex.org下载的speex.dll放到运行目录下,就可以正常地使用SPEEX的编解码功能了。
关于Speex更高级的功能,我正在研究中,有兴趣的朋友可以email给我一起探讨。
Speex dll 可以到 官网下载 页面下载。
注:我们的研究成果已经全部集成到了 OMCS 中,其支持回音消除(AEC)、静音检测(VAD)、噪音抑制(DENOISE)、自动增益(AGC)等网络语音技术,有兴趣的可以了解一下。