/*
* Bit-packing data structure representing (part of) a bit-stream.
*/(speex_bits.h--
49)
typedef
struct SpeexBits {
char *chars;
/*
*< "raw" data
*/
int nbBits;
/*
*< Total number of bits stored in the stream
*/
int charPtr;
/*
*< Position of the byte "cursor"
*/
int bitPtr;
/*
*< Position of the bit "cursor" within the current char
*/
int owner;
/*
*< Does the struct "own" the "raw" buffer (member "chars")
*/
int overflow;
/*
*< Set to one if we try to read past the valid data
*/
int buf_size;
/*
*< Allocated size for buffer
*/
int reserved1;
/*
*< Reserved for future use
*/
void *reserved2;
/*
*< Reserved for future use
*/
} SpeexBits;
EXPORT
void speex_bits_init(SpeexBits *bits) (bits.c--
48)
{
bits->chars = (
char*)speex_alloc(MAX_CHARS_PER_FRAME);
if (!bits->chars)
return;
bits->buf_size = MAX_CHARS_PER_FRAME;
bits->owner=
1;
speex_bits_reset(bits);
}
enc_state = speex_encoder_init(&speex_nb_mode);
typedef
struct SpeexNBMode { (modes.h--
117)
int frameSize;
/*
*< Size of frames used for encoding
*/
int subframeSize;
/*
*< Size of sub-frames used for encoding
*/
int lpcSize;
/*
*< Order of LPC filter
*/
int pitchStart;
/*
*< Smallest pitch value allowed
*/
int pitchEnd;
/*
*< Largest pitch value allowed
*/
spx_word16_t gamma1;
/*
*< Perceptual filter parameter #1
*/
spx_word16_t gamma2;
/*
*< Perceptual filter parameter #2
*/
spx_word16_t lpc_floor;
/*
*< Noise floor for LPC analysis
*/
const SpeexSubmode *submodes[NB_SUBMODES];
/*
*< Sub-mode data for the mode
*/
int defaultSubmode;
/*
*< Default sub-mode to use when encoding
*/
int quality_map[
11];
/*
*< Mode corresponding to each quality setting
*/
} SpeexNBMode;
/*
Default mode for narrowband
*/ (modes.c ---
320)
static
const SpeexNBMode nb_mode = {
160,
/*
frameSize
*/
40,
/*
subframeSize
*/
10,
/*
lpcSize
*/
17,
/*
pitchStart
*/
144,
/*
pitchEnd
*/
#ifdef FIXED_POINT
29491,
19661,
/*
gamma1, gamma2
*/
#else
0.9,
0.6,
/*
gamma1, gamma2
*/
#endif
QCONST16(.
0002,
15),
/*
lpc_floor
*/
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{
1,
8,
2,
3,
3,
4,
4,
5,
5,
6,
7}
};
/*
Default mode for narrowband
*/ (modes.c ---
340)
EXPORT
const SpeexMode speex_nb_mode = {
&nb_mode,
nb_mode_query,
"
narrowband
",
0,
4,
&nb_encoder_init,
&nb_encoder_destroy, (nb_celp.c)
&nb_encode,
&nb_decoder_init,
&nb_decoder_destroy,
&nb_decode,
&nb_encoder_ctl,
&nb_decoder_ctl,
};
/*
* Struct defining a Speex mode
*/ (speex.h--
248)
typedef
struct SpeexMode {
/*
* Pointer to the low-level mode data
*/
const
void *mode;
/*
* Pointer to the mode query function
*/
mode_query_func query;
/*
* The name of the mode (you should not rely on this to identify the mode)
*/
const
char *modeName;
/*
*ID of the mode
*/
int modeID;
/*
*Version number of the bitstream (incremented every time we break
bitstream compatibility
*/
int bitstream_version;
/*
* Pointer to encoder initialization function
*/
encoder_init_func enc_init;
/*
* Pointer to encoder destruction function
*/
encoder_destroy_func enc_destroy;
/*
* Pointer to frame encoding function
*/
encode_func enc;
/*
* Pointer to decoder initialization function
*/
decoder_init_func dec_init;
/*
* Pointer to decoder destruction function
*/
decoder_destroy_func dec_destroy;
/*
* Pointer to frame decoding function
*/
decode_func dec;
/*
* ioctl-like requests for encoder
*/
encoder_ctl_func enc_ctl;
/*
* ioctl-like requests for decoder
*/
decoder_ctl_func dec_ctl;
} SpeexMode;
/*
*Structure representing the full state of the narrowband encoder
*/(nb_celp.h--
49)
typedef
struct EncState {
const SpeexMode *mode;
/*
*< Mode corresponding to the state
*/
int first;
/*
*< Is this the first frame?
*/
int frameSize;
/*
*< Size of frames
*/
int subframeSize;
/*
*< Size of sub-frames
*/
int nbSubframes;
/*
*< Number of sub-frames
*/
int windowSize;
/*
*< Analysis (LPC) window length
*/
int lpcSize;
/*
*< LPC order
*/
int min_pitch;
/*
*< Minimum pitch value allowed
*/
int max_pitch;
/*
*< Maximum pitch value allowed
*/
spx_word32_t cumul_gain;
/*
*< Product of previously used pitch gains (Q10)
*/
int bounded_pitch;
/*
*< Next frame should not rely on previous frames for pitch
*/
int ol_pitch;
/*
*< Open-loop pitch
*/
int ol_voiced;
/*
*< Open-loop voiced/non-voiced decision
*/
int *pitch;
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1;
/*
*< Perceptual filter: A(z/gamma1)
*/
spx_word16_t gamma2;
/*
*< Perceptual filter: A(z/gamma2)
*/
spx_word16_t lpc_floor;
/*
*< Noise floor multiplier for A[0] in LPC analysis
*/
char *stack;
/*
*< Pseudo-stack allocation for temporary memory
*/
spx_word16_t *winBuf;
/*
*< Input buffer (original signal)
*/
spx_word16_t *excBuf;
/*
*< Excitation buffer
*/
spx_word16_t *exc;
/*
*< Start of excitation frame
*/
spx_word16_t *swBuf;
/*
*< Weighted signal buffer
*/
spx_word16_t *sw;
/*
*< Start of weighted signal frame
*/
const spx_word16_t *window;
/*
*< Temporary (Hanning) window
*/
const spx_word16_t *lagWindow;
/*
*< Window applied to auto-correlation
*/
spx_lsp_t *old_lsp;
/*
*< LSPs for previous frame
*/
spx_lsp_t *old_qlsp;
/*
*< Quantized LSPs for previous frame
*/
spx_mem_t *mem_sp;
/*
*< Filter memory for signal synthesis
*/
spx_mem_t *mem_sw;
/*
*< Filter memory for perceptually-weighted signal
*/
spx_mem_t *mem_sw_whole;
/*
*< Filter memory for perceptually-weighted signal (whole frame)
*/
spx_mem_t *mem_exc;
/*
*< Filter memory for excitation (whole frame)
*/
spx_mem_t *mem_exc2;
/*
*< Filter memory for excitation (whole frame)
*/
spx_mem_t mem_hp[
2];
/*
*< High-pass filter memory
*/
spx_word32_t *pi_gain;
/*
*< Gain of LPC filter at theta=pi (fe/2)
*/
spx_word16_t *innov_rms_save;
/*
*< If non-NULL, innovation RMS is copied here
*/
#ifndef DISABLE_VBR
VBRState *vbr;
/*
*< State of the VBR data
*/
float vbr_quality;
/*
*< Quality setting for VBR encoding
*/
float relative_quality;
/*
*< Relative quality that will be needed by VBR
*/
spx_int32_t vbr_enabled;
/*
*< 1 for enabling VBR, 0 otherwise
*/
spx_int32_t vbr_max;
/*
*< Max bit-rate allowed in VBR mode
*/
int vad_enabled;
/*
*< 1 for enabling VAD, 0 otherwise
*/
int dtx_enabled;
/*
*< 1 for enabling DTX, 0 otherwise
*/
int dtx_count;
/*
*< Number of consecutive DTX frames
*/
spx_int32_t abr_enabled;
/*
*< ABR setting (in bps), 0 if off
*/
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity;
/*
*< Complexity setting (0-10 from least complex to most complex)
*/
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode *
const *submodes;
/*
*< Sub-mode data
*/
int submodeID;
/*
*< Activated sub-mode
*/
int submodeSelect;
/*
*< Mode chosen by the user (may differ from submodeID if VAD is on)
*/
int isWideband;
/*
*< Is this used as part of the embedded wideband codec
*/
int highpass_enabled;
/*
*< Is the input filter enabled
*/
} EncState;
speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c--
1196)
st->submodeSelect = st->submodeID = ((
const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值:{
1,
8,
2,
3,
3,
4,
4,
5,
5,
6,
7}
st->submodeSelect:
/*
*< Mode chosen by the user (may differ from submodeID if VAD is on)
*/
st->submodeID:**< Activated sub-mode */
/*
* Struct defining the encoding/decoding mode for SB-CELP (wideband)
*/
typedef
struct SpeexSBMode {
const SpeexMode *nb_mode;
/*
*< Embedded narrowband mode
*/
int frameSize;
/*
*< Size of frames used for encoding
*/
int subframeSize;
/*
*< Size of sub-frames used for encoding
*/
int lpcSize;
/*
*< Order of LPC filter
*/
spx_word16_t gamma1;
/*
*< Perceptual filter parameter #1
*/
spx_word16_t gamma2;
/*
*< Perceptual filter parameter #1
*/
spx_word16_t lpc_floor;
/*
*< Noise floor for LPC analysis
*/
spx_word16_t folding_gain;
const SpeexSubmode *submodes[SB_SUBMODES];
/*
*< Sub-mode data for the mode
*/
int defaultSubmode;
/*
*< Default sub-mode to use when encoding
*/
int low_quality_map[
11];
/*
*< Mode corresponding to each quality setting
*/
int quality_map[
11];
/*
*< Mode corresponding to each quality setting
*/
#ifndef DISABLE_VBR
const
float (*vbr_thresh)[
11];
#endif
int nb_modes;
} SpeexSBMode;
quality与bit-rate对照表:
mode quality bit-rate mflops quality/description
1
0
2,
150
6 Vocoder (mostly
for comfort noise)
2
2
5,
950
9 Very noticeable artifacts/noise, good intelligibility
3
3-
4
8,
000
10 Artifacts/noise sometimes noticeable
4
5-
6
11,
000
14 Artifacts usually noticeable only with headphones
5
7-
8
15,
000
11 Need good headphones to tell the difference
6
9
18,
200
17.5 Hard to tell the difference even with good headphones
7
10
24,
600
14.5 Completely transparent
for voice, good quality music
8
1
3,
950
10.5
speex_encode_int(enc_state, input_frame, &bits);
EXPORT
int speex_encode_int(
void *state, spx_int16_t *
in, SpeexBits *bits)
{
int i;
spx_int32_t N;
float float_in[MAX_IN_SAMPLES];
speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
for (i=
0;i<N;i++)
float_in[i] =
in[i];
return (*((SpeexMode**)state))->enc(state, float_in, bits);
//
nb_encode (cb_clep.c--252)
}
nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);
EXPORT
int speex_bits_write(SpeexBits *bits,
char *chars,
int max_nbytes) (bits.--
188)
{
int i;
int max_nchars = max_nbytes/BYTES_PER_CHAR;
int charPtr, bitPtr, nbBits;
/*
Insert terminator, but save the data so we can put it back after
*/
bitPtr=bits->bitPtr;
charPtr=bits->charPtr;
nbBits=bits->nbBits;
speex_bits_insert_terminator(bits);
bits->bitPtr=bitPtr;
bits->charPtr=charPtr;
bits->nbBits=nbBits;
if (max_nchars > ((bits->nbBits+BITS_PER_CHAR-
1)>>LOG2_BITS_PER_CHAR))
max_nchars = ((bits->nbBits+BITS_PER_CHAR-
1)>>LOG2_BITS_PER_CHAR);
for (i=
0;i<max_nchars;i++)
chars[i]=HTOLS(bits->chars[i]);
return max_nchars*BYTES_PER_CHAR;
}