http://blog.sina.com.cn/s/blog_513f4e8401011hf9.html
pjsip的语音检测
在pjsip中包含了语音静音检测的实现。
方法就是对每帧的采样点进行累加,得到的结果和门限进行比较。
语音检测分为固定模式和动态模式。固定模式就是累加值和给定的门限比较,动态模式的具体方法还不确定。
语音检测的调用关系:
g711_encode()->pjmedia_silence_det_detect()
g711_encode:
is_silence = pjmedia_silence_det_detect(priv->vad,
(const pj_int16_t*) input->buf,
(input->size >> 1), NULL);
if (is_silence &&
(PJMEDIA_CODEC_MAX_SILENCE_PERIOD == -1 ||
silence_period < PJMEDIA_CODEC_MAX_SILENCE_PERIOD*8000/1000))
{
output->type = PJMEDIA_FRAME_TYPE_NONE;
output->buf = NULL;
output->size = 0;
output->timestamp = input->timestamp;
return PJ_SUCCESS;
} else {
priv->last_tx = input->timestamp;
}
}
........
这里把代码贴一下:
PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd,
pj_uint32_t level)
{
int avg_recent_level;
if (gp_pjmedia_silence_det->mode == VAD_MODE_NONE)//无模式
return PJ_FALSE;
if (gp_pjmedia_silence_det->mode == VAD_MODE_FIXED)//静态模式
return (level < gp_pjmedia_silence_det->threshold);
)//动态模式
gp_pjmedia_silence_det->sum_level += level;
++gp_pjmedia_silence_det->sum_cnt;
avg_recent_level = (gp_pjmedia_silence_det->sum_level / gp_pjmedia_silence_det->sum_cnt);
if (level > gp_pjmedia_silence_det->threshold ||
level >= PJMEDIA_SILENCE_DET_MAX_THRESHOLD)
{
gp_pjmedia_silence_det->silence_timer = 0;
gp_pjmedia_silence_det->voiced_timer += gp_pjmedia_silence_det->ptime;
switch(gp_pjmedia_silence_det->state) {
case STATE_VOICED:
if (gp_pjmedia_silence_det->voiced_timer > gp_pjmedia_silence_det->recalc_on_voiced) {
gp_pjmedia_silence_det->threshold = (avg_recent_level + gp_pjmedia_silence_det->threshold) >> 1;
TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)"
"to %d", gp_pjmedia_silence_det->threshold));
gp_pjmedia_silence_det->voiced_timer = 0;
gp_pjmedia_silence_det->sum_level = avg_recent_level;
gp_pjmedia_silence_det->sum_cnt = 1;
}
break;
case STATE_SILENCE:
TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)",
level, gp_pjmedia_silence_det->threshold));
case STATE_START_SILENCE:
gp_pjmedia_silence_det->state = STATE_VOICED;
gp_pjmedia_silence_det->sum_level = level;
gp_pjmedia_silence_det->sum_cnt = 1;
break;
default:
pj_assert(0);
break;
}
} else {
gp_pjmedia_silence_det->voiced_timer = 0;
gp_pjmedia_silence_det->silence_timer += gp_pjmedia_silence_det->ptime;
switch(gp_pjmedia_silence_det->state) {
case STATE_SILENCE:
if (gp_pjmedia_silence_det->silence_timer >= gp_pjmedia_silence_det->recalc_on_silence) {
gp_pjmedia_silence_det->threshold = avg_recent_level << 1;
TRACE_((THIS_FILE,"Re-adjust threshold (in silence)"
"to %d", gp_pjmedia_silence_det->threshold));
gp_pjmedia_silence_det->silence_timer = 0;
gp_pjmedia_silence_det->sum_level = avg_recent_level;
gp_pjmedia_silence_det->sum_cnt = 1;
}
break;
case STATE_VOICED:
gp_pjmedia_silence_det->state = STATE_START_SILENCE;
gp_pjmedia_silence_det->sum_level = level;
gp_pjmedia_silence_det->sum_cnt = 1;
case STATE_START_SILENCE:
if (gp_pjmedia_silence_det->silence_timer >= gp_pjmedia_silence_det->before_silence) {
gp_pjmedia_silence_det->state = STATE_SILENCE;
gp_pjmedia_silence_det->threshold = avg_recent_level << 1;
TRACE_((THIS_FILE,"Starting silence (level=%d "
"threshold=%d,g_silence_nbr:%d)", level, gp_pjmedia_silence_det->threshold,g_silence_nbr));
gp_pjmedia_silence_det->sum_level = avg_recent_level;
gp_pjmedia_silence_det->sum_cnt = 1;
}
break;
default:
pj_assert(0);
break;
}
}
return (sd->state == STATE_SILENCE);
}
语音检测的调用关系: