目前大部分的只能呼叫中心系统都对接了ASR语音识别引擎,主流的识别引擎有科大讯飞,阿里,百度等大厂的产品,价格不菲,有没有什么免费的引擎可以使用呢,答案是肯定的。
vosk是一个离线开源语音识别工具,它可以识别16种语言,包括中文,而且总体效果还是不错的,因为我们要对接到呼叫中心,因此我们需要实时的流式传输语音数据,目前主流的解决方案是采用websocket协议传输语音,这块的话Vosk直接提供了websocket的server程序。而且程序已经打包成docker发布,所以启动起来相当简单,真的很贴心,一句命令搞定:
docker run -d -p 2700:2700 alphacep/kaldi-cn:latest
因为我以前做过freeswtich与科大以及阿里的识别对接,所以非常自然的就考虑采用freeswtich插件的方式进行asr对接,而且freeswtich已经定义好了asr识别的接口,我们只要根据接口去做实现就可以了,主要要实现的接口如下:
asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE);
asr_interface->interface_name = "vosk";
asr_interface->asr_open = vosk_asr_open;
asr_interface->asr_close = vosk_asr_close;
asr_interface->asr_load_grammar = vosk_asr_load_grammar;
asr_interface->asr_unload_grammar = vosk_asr_unload_grammar;
asr_interface->asr_resume = vosk_asr_resume;
asr_interface->asr_pause = vosk_asr_pause;
asr_interface->asr_feed = vosk_asr_feed;
asr_interface->asr_check_results = vosk_asr_check_results;
asr_interface->asr_get_results = vosk_asr_get_results;
asr_interface->asr_start_input_timers = vosk_asr_start_input_timers;
参考了一些资料已经熟悉了一下vosk的websocket的通信协议,自定义了一个mod_vosk的插件模块,主要源码如下:
#include
#include "libks/ks.h"
#define AUDIO_BLOCK_SIZE 3200
SWITCH_MODULE_LOAD_FUNCTION(mod_vosk_load);
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vosk_shutdown);
SWITCH_MODULE_DEFINITION(mod_vosk, mod_vosk_load, mod_vosk_shutdown, NULL);
static switch_mutex_t *MUTEX = NULL;
static switch_event_node_t *NODE = NULL;
static struct {
char *server_url;
int return_json;
int auto_reload;
switch_memory_pool_t *pool;
ks_pool_t *ks_pool;
} globals;
typedef struct {
kws_t *ws;
char *result;
switch_mutex_t *mutex;
switch_buffer_t *audio_buffer;
} vosk_t;
/*! function to open the asr interface */
static switch_status_t vosk_asr_open(switch_asr_handle_t *ah, const char *codec, int rate, const char *dest, switch_asr_flag_t *flags)
{
vosk_t *vosk;
ks_json_t *req = ks_json_create_object();
ks_json_add_string_to_object(req, "url", (dest ? dest : globals.server_url));
if (!(vosk = (vosk_t *) switch_core_alloc(ah->memory_pool, sizeof(*vosk)))) {
return SWITCH_STATUS_MEMERR;
}
ah->private_info = vosk;
switch_mutex_init(&vosk->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool);
if (switch_buffer_create_dynamic(&vosk->audio_buffer, AUDIO_BLOCK_SIZE, AUDIO_BLOCK_SIZE, 0) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Buffer create failed\n");
return SWITCH_STATUS_MEMERR;
}
codec = "L16";
ah->codec = switch_core_strdup(ah->memory_pool, codec);
if (kws_connect_ex(&vosk->ws, req, KWS_BLOCK | KWS_CLOSE_SOCK, globals.ks_pool, NULL, 30000) != KS_STATUS_SUCCESS) {
ks_json_delete(&req);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Websocket connect to %s failed\n", globals.server_url);
return SWITCH_STATUS_GENERR;
}
ks_json_delete(&req);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "ASR open\n");
return SWITCH_STATUS_SUCCESS;
}
/*! function to close the asr interface */
static switch_status_t vosk_asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags)
{
vosk_t *vosk = (vosk_t *) ah->private_info;
switch_mutex_lock(vosk->mutex);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "ASR closed\n");
/** FIXME: websockets server still expects us to read the close confirmation and only then close
libks library doens't implement it yet. */
kws_close(vosk->ws, KWS_CLOSE_SOCK);
kws_destroy(&vosk->ws);
switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED);
switch_buffer_destroy(&vosk->audio_buffer);
switch_safe_free(vosk->result);
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_SUCCESS;
}
/*! function to feed audio to the ASR */
static switch_status_t vosk_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)
{
int poll_result;
kws_opcode_t oc;
uint8_t *rdata;
int rlen;
vosk_t *vosk = (vosk_t *) ah->private_info;
if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))
return SWITCH_STATUS_BREAK;
switch_mutex_lock(vosk->mutex);
switch_buffer_write(vosk->audio_buffer, data, len);
if (switch_buffer_inuse(vosk->audio_buffer) > AUDIO_BLOCK_SIZE) {
char buf[AUDIO_BLOCK_SIZE];
int rlen;
rlen = switch_buffer_read(vosk->audio_buffer, buf, AUDIO_BLOCK_SIZE);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Sending data %d\n", rlen);
if (kws_write_frame(vosk->ws, WSOC_BINARY, buf, rlen) < 0) {
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_BREAK;
}
}
poll_result = kws_wait_sock(vosk->ws, 0, KS_POLL_READ | KS_POLL_ERROR);
if (poll_result != KS_POLL_READ) {
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received Poll Failed\n");
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_SUCCESS;
}
rlen = kws_read_frame(vosk->ws, &oc, &rdata);
if (rlen < 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received Read Failed\n");
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_BREAK;
}
if (oc == WSOC_PING) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received ping\n");
kws_write_frame(vosk->ws, WSOC_PONG, rdata, rlen);
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_SUCCESS;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Recieved %d bytes:\n%s\n", rlen, rdata);
switch_safe_free(vosk->result);
vosk->result = switch_safe_strdup((const char *)rdata);
switch_mutex_unlock(vosk->mutex);
return SWITCH_STATUS_SUCCESS;
}
/*! function to pause recognizer */
static switch_status_t vosk_asr_pause(switch_asr_handle_t *ah)
{
return SWITCH_STATUS_SUCCESS;
}
/*! function to resume recognizer */
static switch_status_t vosk_asr_resume(switch_asr_handle_t *ah)
{
return SWITCH_STATUS_SUCCESS;
}
/*! Process asr_load_grammar request from FreeSWITCH. */
static switch_status_t vosk_asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name)
{
return SWITCH_STATUS_SUCCESS;
}
/*! Process asr_unload_grammar request from FreeSWITCH. */
static switch_status_t vosk_asr_unload_grammar(switch_asr_handle_t *ah, const char *name)
{
return SWITCH_STATUS_SUCCESS;
}
/*! function to read results from the ASR*/
static switch_status_t vosk_asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags)
{
vosk_t *vosk = (vosk_t *) ah->private_info;
return (vosk->result && (strstr(vosk->result, "\"\"") == NULL)) ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE;
}
/*! function to read results from the ASR */
static switch_status_t vosk_asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags)
{
vosk_t *vosk = (vosk_t *) ah->private_info;
switch_status_t ret;
switch_mutex_lock(vosk->mutex);
if (globals.return_json) {
if (strstr(vosk->result, "\"partial\"") == NULL) {
*xmlstr = switch_safe_strdup(vosk->result);
ret = SWITCH_STATUS_SUCCESS;
} else {
*xmlstr = switch_safe_strdup(vosk->result);
ret = SWITCH_STATUS_MORE_DATA;
}
} else {
cJSON *result = cJSON_Parse(vosk->result);
if (cJSON_HasObjectItem(result, "text")) {
*xmlstr = switch_safe_strdup(cJSON_GetObjectCstr(result, "text"));
ret = SWITCH_STATUS_SUCCESS;
} else if (cJSON_HasObjectItem(result, "partial")) {
*xmlstr = switch_safe_strdup(cJSON_GetObjectCstr(result, "partial"));
ret = SWITCH_STATUS_MORE_DATA;
} else {
ret = SWITCH_STATUS_GENERR;
}
cJSON_Delete(result);
}
switch_safe_free(vosk->result);
vosk->result = NULL;
switch_mutex_unlock(vosk->mutex);
return ret;
}
/*! function to start input timeouts */
static switch_status_t vosk_asr_start_input_timers(switch_asr_handle_t *ah)
{
return SWITCH_STATUS_SUCCESS;
}
static switch_status_t load_config(void)
{
char *cf = "vosk.conf";
switch_xml_t cfg, xml = NULL, param, settings;
switch_status_t status = SWITCH_STATUS_SUCCESS;
if (!(xml = switch_xml_open_cfg(cf, &cfg, NULL))) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open of %s failed\n", cf);
status = SWITCH_STATUS_FALSE;
goto done;
}
if ((settings = switch_xml_child(cfg, "settings"))) {
for (param = switch_xml_child(settings, "param"); param; param = param->next) {
char *var = (char *) switch_xml_attr_soft(param, "name");
char *val = (char *) switch_xml_attr_soft(param, "value");
if (!strcasecmp(var, "server-url")) {
globals.server_url = switch_core_strdup(globals.pool, val);
}
if (!strcasecmp(var, "return-json")) {
globals.return_json = atoi(val);
}
}
}
done:
if (!globals.server_url) {
globals.server_url = switch_core_strdup(globals.pool, "ws://127.0.0.1:2700");
}
if (xml) {
switch_xml_free(xml);
}
return status;
}
static void do_load(void)
{
switch_mutex_lock(MUTEX);
load_config();
switch_mutex_unlock(MUTEX);
}
static void event_handler(switch_event_t *event)
{
if (globals.auto_reload) {
do_load();
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Vosk Reloaded\n");
}
}
SWITCH_MODULE_LOAD_FUNCTION(mod_vosk_load)
{
switch_asr_interface_t *asr_interface;
switch_mutex_init(&MUTEX, SWITCH_MUTEX_NESTED, pool);
globals.pool = pool;
ks_init();
ks_pool_open(&globals.ks_pool);
ks_global_set_default_logger(7);
if ((switch_event_bind_removable(modname, SWITCH_EVENT_RELOADXML, NULL, event_handler, NULL, &NODE) != SWITCH_STATUS_SUCCESS)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't bind!\n");
}
do_load();
/* connect my internal structure to the blank pointer passed to me */
*module_interface = switch_loadable_module_create_module_interface(pool, modname);
asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE);
asr_interface->interface_name = "vosk";
asr_interface->asr_open = vosk_asr_open;
asr_interface->asr_close = vosk_asr_close;
asr_interface->asr_load_grammar = vosk_asr_load_grammar;
asr_interface->asr_unload_grammar = vosk_asr_unload_grammar;
asr_interface->asr_resume = vosk_asr_resume;
asr_interface->asr_pause = vosk_asr_pause;
asr_interface->asr_feed = vosk_asr_feed;
asr_interface->asr_check_results = vosk_asr_check_results;
asr_interface->asr_get_results = vosk_asr_get_results;
asr_interface->asr_start_input_timers = vosk_asr_start_input_timers;
/* indicate that the module should continue to be loaded */
return SWITCH_STATUS_SUCCESS;
}
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vosk_shutdown)
{
ks_pool_close(&globals.ks_pool);
ks_shutdown();
switch_event_unbind(&NODE);
return SWITCH_STATUS_UNLOAD;
}
好啦,注册一个分机测试一下,配置一个dialplan如下:
效果杠杠的,重点是免费!
mod的源码已发布到github上,地址shanghaimoon888/mod_voskasr (github.com),有兴趣的朋友可以自行下载,有问题也可以加QQ:1869731探讨!