MRCP(Media Resource Control Protocol, 媒体资源控制协议)
是一种通讯协议,用于语音服务器向客户端提供各种语音服务,例如 语音识别(ASR)和语音合成(TTS)。FreeSWITCH 中的 unimrcp模块 就是对接 MRCP 协议栈的中间层,提供了集成使用 ASR、TTS 的能力。下图是 FreeSWITCH 中 unimrcp模块 的源码时序,下文将对源码进行分析
在 FreeSWITCH 1.10 源码阅读(1)-服务启动及 Event Socket 模块工作原理 中笔者分析了 FreeSWITCH 加载模块的主流程,unimrcp 模块被加载时将触发 mod_unimrcp.c#SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
执行。这个函数比较简练,大致有以下几个关键点:
- 调用
mod_unimrcp.c#mod_unimrcp_do_config()
函数获取 XML 配置中指定的unimrcp.conf
名称下的配置内容,这部分不做赘述- 调用
mod_unimrcp.c#mod_unimrcp_client_create()
函数创建 FreeSWITCH 本地的 MRCP 客户端,用于后续与 MRCP 服务器交互- 调用
mod_unimrcp.c#synth_load()
函数加载创建 TTS 应用- 调用
mod_unimrcp.c#recog_load()
函数加载创建 ASR 应用,与 TTS 应用加载类似,不做赘述- 调用库函数
mrcp_client.c#mrcp_client_start()
新开线程启动 MRCP 客户端,涉及库函数不做赘述
SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
{
if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CREATE) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CREATE);
return SWITCH_STATUS_TERM;
}
if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CLOSE) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CLOSE);
return SWITCH_STATUS_TERM;
}
if (switch_event_reserve_subclass(MY_EVENT_PROFILE_OPEN) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_OPEN);
return SWITCH_STATUS_TERM;
}
/* connect my internal structure to the blank pointer passed to me */
*module_interface = switch_loadable_module_create_module_interface(pool, modname);
memset(&globals, 0, sizeof(globals));
switch_mutex_init(&globals.mutex, SWITCH_MUTEX_UNNESTED, pool);
globals.speech_channel_number = 0;
switch_core_hash_init_nocase(&globals.profiles);
/* get MRCP module configuration */
mod_unimrcp_do_config();
if (zstr(globals.unimrcp_default_synth_profile)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-tts-profile\n");
return SWITCH_STATUS_FALSE;
}
if (zstr(globals.unimrcp_default_recog_profile)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-asr-profile\n");
return SWITCH_STATUS_FALSE;
}
/* link UniMRCP logs to FreeSWITCH */
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "UniMRCP log level = %s\n", globals.unimrcp_log_level);
if (apt_log_instance_create(APT_LOG_OUTPUT_NONE, str_to_log_level(globals.unimrcp_log_level), pool) == FALSE) {
/* already created */
apt_log_priority_set(str_to_log_level(globals.unimrcp_log_level));
}
apt_log_ext_handler_set(unimrcp_log);
/* Create the MRCP client */
if ((globals.mrcp_client = mod_unimrcp_client_create(pool)) == NULL) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create mrcp client\n");
return SWITCH_STATUS_FALSE;
}
/* Create the synthesizer interface */
if (synth_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
return SWITCH_STATUS_FALSE;
}
/* Create the recognizer interface */
if (recog_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
return SWITCH_STATUS_FALSE;
}
/* Start the client stack */
mrcp_client_start(globals.mrcp_client);
/* indicate that the module should continue to be loaded */
return SWITCH_STATUS_SUCCESS;
}
mod_unimrcp.c#mod_unimrcp_client_create()
函数的关键点在于与底层 mrcp 库的交互,由于底层库已经不属于 FreeSWITCH 源码,本文不会再深入:
- 调用库函数
mrcp_client.c#mrcp_client_create()
创建 FreeSWITCH 作为客户端连接 MRCP 服务器的 mrcp_client_t 对象,并设置该对象上回调函数表中处理消息的函数为mrcp_client.c#mrcp_client_msg_process()
- 调用库函数
mrcp_client_connection.c#mrcp_client_connection_agent_create()
创建 MRCP 连接端点对象 mrcp_connection_agent_t,用于管理底层 socket 数据读写- 调用
mrcp_client.c#mrcp_client_connection_agent_register()
将 MRCP 连接端点注册到 FreeSWITCH 客户端对象中,并设置底层连接收到数据时的回调函数表为 mrcp_client.connection_method_vtable- 解析 unimrcp 配置文件属性,创建对应的 profile,据此可以将多个 MRCP 服务器的连接信息隔离。如果是 v2 版本的 MRCP 协议,在 FreeSWITCH 和 MRCP 服务器之间还需要 SIP 信令交互,所以也会调用
mrcp_sofiasip_client_agent.c#mrcp_sofiasip_client_agent_create()
函数创建一个 SIP 交互的端点对象
static mrcp_client_t *mod_unimrcp_client_create(switch_memory_pool_t *mod_pool)
{
switch_xml_t cfg = NULL, xml = NULL, profiles = NULL, profile = NULL;
mrcp_client_t *client = NULL;
apr_pool_t *pool = NULL;
mrcp_resource_loader_t *resource_loader = NULL;
mrcp_resource_factory_t *resource_factory = NULL;
mpf_codec_manager_t *codec_manager = NULL;
apr_size_t max_connection_count = 0;
apt_bool_t offer_new_connection = FALSE;
mrcp_connection_agent_t *connection_agent;
mpf_engine_t *media_engine;
apt_dir_layout_t *dir_layout;
/* create the client */
if ((dir_layout = apt_default_dir_layout_create("../", mod_pool)) == NULL) {
goto done;
}
client = mrcp_client_create(dir_layout);
if (!client) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP client\n");
goto done;
}
pool = mrcp_client_memory_pool_get(client);
if (!pool) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to get MRCP client memory pool\n");
client = NULL;
goto done;
}
/* load the synthesizer and recognizer resources */
resource_loader = mrcp_resource_loader_create(FALSE, pool);
if (!resource_loader) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP resource loader\n");
client = NULL;
goto done;
} else {
apt_str_t synth_resource;
apt_str_t recog_resource;
apt_string_set(&synth_resource, "speechsynth");
mrcp_resource_load(resource_loader, &synth_resource);
apt_string_set(&recog_resource, "speechrecog");
mrcp_resource_load(resource_loader, &recog_resource);
resource_factory = mrcp_resource_factory_get(resource_loader);
mrcp_client_resource_factory_register(client, resource_factory);
}
codec_manager = mpf_engine_codec_manager_create(pool);
if (!codec_manager) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF codec manager\n");
client = NULL;
goto done;
}
if (!mrcp_client_codec_manager_register(client, codec_manager)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP client codec manager\n");
client = NULL;
goto done;
}
/* set up MRCP connection agent that will be shared with all profiles */
if (!zstr(globals.unimrcp_max_connection_count)) {
max_connection_count = atoi(globals.unimrcp_max_connection_count);
}
if (max_connection_count <= 0) {
max_connection_count = 100;
}
if (!zstr(globals.unimrcp_offer_new_connection)) {
offer_new_connection = strcasecmp("true", globals.unimrcp_offer_new_connection);
}
connection_agent = mrcp_client_connection_agent_create("MRCPv2ConnectionAgent", max_connection_count, offer_new_connection, pool);
if (!connection_agent) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP connection agent\n");
client = NULL;
goto done;
}
if (!zstr(globals.unimrcp_rx_buffer_size)) {
apr_size_t rx_buffer_size = (apr_size_t)atol(globals.unimrcp_rx_buffer_size);
if (rx_buffer_size > 0) {
mrcp_client_connection_rx_size_set(connection_agent, rx_buffer_size);
}
}
if (!zstr(globals.unimrcp_tx_buffer_size)) {
apr_size_t tx_buffer_size = (apr_size_t)atol(globals.unimrcp_tx_buffer_size);
if (tx_buffer_size > 0) {
mrcp_client_connection_tx_size_set(connection_agent, tx_buffer_size);
}
}
if (!zstr(globals.unimrcp_request_timeout)) {
apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
if (request_timeout > 0) {
mrcp_client_connection_timeout_set(connection_agent, request_timeout);
}
}
if (!mrcp_client_connection_agent_register(client, connection_agent)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP connection agent\n");
client = NULL;
goto done;
}
/* Set up the media engine that will be shared with all profiles */
media_engine = mpf_engine_create("MediaEngine", pool);
if (!media_engine) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF media engine\n");
client = NULL;
goto done;
}
if (!mpf_engine_scheduler_rate_set(media_engine, 1)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to set MPF engine scheduler rate\n");
client = NULL;
goto done;
}
if (!mrcp_client_media_engine_register(client, media_engine)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to register MPF media engine\n");
client = NULL;
goto done;
}
/* configure the client profiles */
if (!(xml = switch_xml_open_cfg(CONFIG_FILE, &cfg, NULL))) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Could not open %s\n", CONFIG_FILE);
client = NULL;
goto done;
}
if ((profiles = switch_xml_child(cfg, "profiles"))) {
for (profile = switch_xml_child(profiles, "profile"); profile; profile = switch_xml_next(profile)) {
/* a profile is a signaling agent + termination factory + media engine + connection agent (MRCPv2 only) */
mrcp_sig_agent_t *agent = NULL;
mpf_termination_factory_t *termination_factory = NULL;
mrcp_profile_t *mprofile = NULL;
mpf_rtp_config_t *rtp_config = NULL;
mpf_rtp_settings_t *rtp_settings = mpf_rtp_settings_alloc(pool);
mrcp_sig_settings_t *sig_settings = mrcp_signaling_settings_alloc(pool);
profile_t *mod_profile = NULL;
switch_xml_t default_params = NULL;
mrcp_connection_agent_t *v2_profile_connection_agent = NULL;
/* get profile attributes */
const char *name = apr_pstrdup(pool, switch_xml_attr(profile, "name"));
const char *version = switch_xml_attr(profile, "version");
if (zstr(name) || zstr(version)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, " missing name or version attribute\n" );
client = NULL;
goto done;
}
/* prepare mod_unimrcp's profile for configuration */
profile_create(&mod_profile, name, mod_pool);
if (mod_profile) {
switch_core_hash_insert(globals.profiles, mod_profile->name, mod_profile);
} else {
client = NULL;
goto done;
}
/* pull in any default SPEAK params */
default_params = switch_xml_child(profile, "synthparams");
if (default_params) {
switch_xml_t param = NULL;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK params\n");
for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
const char *param_name = switch_xml_attr(param, "name");
const char *param_value = switch_xml_attr(param, "value");
if (zstr(param_name)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param name\n");
client = NULL;
goto done;
}
if (zstr(param_value)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param value\n");
client = NULL;
goto done;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK Param %s:%s\n", param_name, param_value);
switch_core_hash_insert(mod_profile->default_synth_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
}
}
/* pull in any default RECOGNIZE params */
default_params = switch_xml_child(profile, "recogparams");
if (default_params) {
switch_xml_t param = NULL;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE params\n");
for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
const char *param_name = switch_xml_attr(param, "name");
const char *param_value = switch_xml_attr(param, "value");
if (zstr(param_name)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param name\n");
client = NULL;
goto done;
}
if (zstr(param_value)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param value\n");
client = NULL;
goto done;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE Param %s:%s\n", param_name, param_value);
switch_core_hash_insert(mod_profile->default_recog_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
}
}
/* create RTP config, common to MRCPv1 and MRCPv2 */
rtp_config = mpf_rtp_config_alloc(pool);
rtp_config->rtp_port_min = DEFAULT_RTP_PORT_MIN;
rtp_config->rtp_port_max = DEFAULT_RTP_PORT_MAX;
apt_string_set(&rtp_config->ip, DEFAULT_LOCAL_IP_ADDRESS);
if (strcmp("1", version) == 0) {
/* MRCPv1 configuration */
switch_xml_t param = NULL;
rtsp_client_config_t *config = mrcp_unirtsp_client_config_alloc(pool);
config->origin = DEFAULT_SDP_ORIGIN;
sig_settings->resource_location = DEFAULT_RESOURCE_LOCATION;
v2_profile_connection_agent = NULL;
if (!zstr(globals.unimrcp_request_timeout)) {
apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
if (request_timeout > 0) {
config->request_timeout = request_timeout;
}
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv1 profile: %s\n", name);
for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
const char *param_name = switch_xml_attr(param, "name");
const char *param_value = switch_xml_attr(param, "value");
if (zstr(param_name)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
client = NULL;
goto done;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
if (!process_mrcpv1_config(config, sig_settings, param_name, param_value, pool) &&
!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
}
}
agent = mrcp_unirtsp_client_agent_create(name, config, pool);
if (!agent) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP RTSP client agent\n");
client = NULL;
goto done;
}
} else if (strcmp("2", version) == 0) {
/* MRCPv2 configuration */
mrcp_sofia_client_config_t *config = mrcp_sofiasip_client_config_alloc(pool);
switch_xml_t param = NULL;
config->local_ip = DEFAULT_LOCAL_IP_ADDRESS;
config->local_port = DEFAULT_SIP_LOCAL_PORT;
sig_settings->server_ip = DEFAULT_REMOTE_IP_ADDRESS;
sig_settings->server_port = DEFAULT_SIP_REMOTE_PORT;
config->ext_ip = NULL;
config->user_agent_name = DEFAULT_SOFIASIP_UA_NAME;
config->origin = DEFAULT_SDP_ORIGIN;
v2_profile_connection_agent = connection_agent;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv2 profile: %s\n", name);
for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
const char *param_name = switch_xml_attr(param, "name");
const char *param_value = switch_xml_attr(param, "value");
if (zstr(param_name)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
client = NULL;
goto done;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
if (!process_mrcpv2_config(config, sig_settings, param_name, param_value, pool) &&
!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
}
}
agent = mrcp_sofiasip_client_agent_create(name, config, pool);
if (!agent) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP SIP client agent\n");
client = NULL;
goto done;
}
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "version must be either \"1\" or \"2\"\n");
client = NULL;
goto done;
}
termination_factory = mpf_rtp_termination_factory_create(rtp_config, pool);
if (!termination_factory) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create RTP termination factory\n");
client = NULL;
goto done;
}
mrcp_client_rtp_factory_register(client, termination_factory, name);
mrcp_client_rtp_settings_register(client, rtp_settings, "RTP-Settings");
mrcp_client_signaling_settings_register(client, sig_settings, "Signaling-Settings");
mrcp_client_signaling_agent_register(client, agent);
/* create the profile and register it */
mprofile = mrcp_client_profile_create(NULL, agent, v2_profile_connection_agent, media_engine, termination_factory, rtp_settings, sig_settings, pool);
if (!mprofile) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP client profile\n");
client = NULL;
goto done;
}
mrcp_client_profile_register(client, mprofile, name);
}
}
done:
if (xml) {
switch_xml_free(xml);
}
return client;
}
mod_unimrcp.c#synth_load()
函数加载创建 TTS 功能应用的处理主要分为两个部分,
- 创建
SWITCH_SPEECH_INTERFACE
接口,将 TTS 相关功能封装到 FreeSWITCH 标准模块结构中,供上层使用- 调用库函数
mrcp_application.c#mrcp_application_create()
创建 unimrcp 模块的 TTS 应用,这个部分主要是将 unimrcp 模块的处理逻辑嵌入到底层 MRCP 客户端,供底层回调通知上层
static switch_status_t synth_load(switch_loadable_module_interface_t *module_interface, switch_memory_pool_t *pool)
{
/* link to FreeSWITCH ASR / TTS callbacks */
switch_speech_interface_t *speech_interface = NULL;
if ((speech_interface = (switch_speech_interface_t *) switch_loadable_module_create_interface(module_interface, SWITCH_SPEECH_INTERFACE)) == NULL) {
return SWITCH_STATUS_FALSE;
}
speech_interface->interface_name = MOD_UNIMRCP;
speech_interface->speech_open = synth_speech_open;
speech_interface->speech_close = synth_speech_close;
speech_interface->speech_feed_tts = synth_speech_feed_tts;
speech_interface->speech_read_tts = synth_speech_read_tts;
speech_interface->speech_flush_tts = synth_speech_flush_tts;
speech_interface->speech_text_param_tts = synth_speech_text_param_tts;
speech_interface->speech_numeric_param_tts = synth_speech_numeric_param_tts;
speech_interface->speech_float_param_tts = synth_speech_float_param_tts;
/* Create the synthesizer application and link its callbacks to UniMRCP */
if ((globals.synth.app = mrcp_application_create(synth_message_handler, (void *) 0, pool)) == NULL) {
return SWITCH_STATUS_FALSE;
}
globals.synth.dispatcher.on_session_update = NULL;
globals.synth.dispatcher.on_session_terminate = speech_on_session_terminate;
globals.synth.dispatcher.on_channel_add = speech_on_channel_add;
globals.synth.dispatcher.on_channel_remove = speech_on_channel_remove;
globals.synth.dispatcher.on_message_receive = synth_on_message_receive;
globals.synth.audio_stream_vtable.destroy = NULL;
globals.synth.audio_stream_vtable.open_rx = NULL;
globals.synth.audio_stream_vtable.close_rx = NULL;
globals.synth.audio_stream_vtable.read_frame = NULL;
globals.synth.audio_stream_vtable.open_tx = NULL;
globals.synth.audio_stream_vtable.close_tx = NULL;
globals.synth.audio_stream_vtable.write_frame = synth_stream_write;
mrcp_client_application_register(globals.mrcp_client, globals.synth.app, "synth");
/* map FreeSWITCH params to MRCP param */
switch_core_hash_init_nocase(&globals.synth.fs_param_map);
switch_core_hash_insert(globals.synth.fs_param_map, "voice", "voice-name");
/* map MRCP params to UniMRCP ID */
switch_core_hash_init_nocase(&globals.synth.param_id_map);
switch_core_hash_insert(globals.synth.param_id_map, "jump-size", unimrcp_param_id_create(SYNTHESIZER_HEADER_JUMP_SIZE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "kill-on-barge-in", unimrcp_param_id_create(SYNTHESIZER_HEADER_KILL_ON_BARGE_IN, pool));
switch_core_hash_insert(globals.synth.param_id_map, "speaker-profile", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAKER_PROFILE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "completion-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_CAUSE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "completion-reason", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_REASON, pool));
switch_core_hash_insert(globals.synth.param_id_map, "voice-gender", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_GENDER, pool));
switch_core_hash_insert(globals.synth.param_id_map, "voice-age", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_AGE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "voice-variant", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_VARIANT, pool));
switch_core_hash_insert(globals.synth.param_id_map, "voice-name", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_NAME, pool));
switch_core_hash_insert(globals.synth.param_id_map, "prosody-volume", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_VOLUME, pool));
switch_core_hash_insert(globals.synth.param_id_map, "prosody-rate", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_RATE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "speech-marker", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_MARKER, pool));
switch_core_hash_insert(globals.synth.param_id_map, "speech-language", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_LANGUAGE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_FETCH_HINT, pool));
switch_core_hash_insert(globals.synth.param_id_map, "audio-fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_AUDIO_FETCH_HINT, pool));
switch_core_hash_insert(globals.synth.param_id_map, "failed-uri", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI, pool));
switch_core_hash_insert(globals.synth.param_id_map, "failed-uri-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI_CAUSE, pool));
switch_core_hash_insert(globals.synth.param_id_map, "speak-restart", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_RESTART, pool));
switch_core_hash_insert(globals.synth.param_id_map, "speak-length", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_LENGTH, pool));
switch_core_hash_insert(globals.synth.param_id_map, "load-lexicon", unimrcp_param_id_create(SYNTHESIZER_HEADER_LOAD_LEXICON, pool));
switch_core_hash_insert(globals.synth.param_id_map, "lexicon-search-order", unimrcp_param_id_create(SYNTHESIZER_HEADER_LEXICON_SEARCH_ORDER, pool));
return SWITCH_STATUS_SUCCESS;
}
以 speak 放音 APP 为例,当上层执行这个 APP 时实际调用到 mod_dptools.c#speak_function()
函数,可以看到该函数主要处理是校验参数合法性,然后调用 switch_ivr_play_say.c#switch_ivr_speak_text()
函数
SWITCH_STANDARD_APP(speak_function)
{
switch_channel_t *channel = switch_core_session_get_channel(session);
char buf[10];
char *argv[3] = { 0 };
int argc;
const char *engine = NULL;
const char *voice = NULL;
char *text = NULL;
char *mydata = NULL;
switch_input_args_t args = { 0 };
if (zstr(data) || !(mydata = switch_core_session_strdup(session, data))) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
return;
}
argc = switch_separate_string(mydata, '|', argv, sizeof(argv) / sizeof(argv[0]));
if (argc == 0) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
return;
} else if (argc == 1) {
text = switch_core_session_strdup(session, data); /* unstripped text */
} else if (argc == 2) {
voice = argv[0];
text = switch_core_session_strdup(session, data + (argv[1] - argv[0])); /* unstripped text */
} else {
engine = argv[0];
voice = argv[1];
text = switch_core_session_strdup(session, data + (argv[2] - argv[0])); /* unstripped text */
}
if (!engine) {
engine = switch_channel_get_variable(channel, "tts_engine");
}
if (!voice) {
voice = switch_channel_get_variable(channel, "tts_voice");
}
if (!(engine && voice && text)) {
if (!engine) {
engine = "NULL";
}
if (!voice) {
voice = "NULL";
}
if (!text) {
text = "NULL";
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params! [%s][%s][%s]\n", engine, voice, text);
switch_channel_hangup(channel, SWITCH_CAUSE_DESTINATION_OUT_OF_ORDER);
}
args.input_callback = on_dtmf;
args.buf = buf;
args.buflen = sizeof(buf);
switch_channel_set_variable(channel, SWITCH_PLAYBACK_TERMINATOR_USED, "");
switch_ivr_speak_text(session, engine, voice, text, &args);
}
switch_ivr_play_say.c#switch_ivr_speak_text()
函数核心处理为以下几步:
- 调用函数
switch_core_soeech.c#switch_core_speech_open()
使用本地 MRCP 客户端请求 MRCP 服务器新建会话- 调用函数
switch_ivr_play_say.c#switch_ivr_speak_text_handle()
处理语音合成
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session,
const char *tts_name, const char *voice_name, const char *text, switch_input_args_t *args)
{
switch_channel_t *channel = switch_core_session_get_channel(session);
uint32_t rate = 0;
int interval = 0;
uint32_t channels;
switch_frame_t write_frame = { 0 };
switch_timer_t ltimer, *timer;
switch_codec_t lcodec, *codec;
switch_memory_pool_t *pool = switch_core_session_get_pool(session);
char *codec_name;
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_speech_handle_t lsh, *sh;
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
const char *timer_name, *var;
cached_speech_handle_t *cache_obj = NULL;
int need_create = 1, need_alloc = 1;
switch_codec_implementation_t read_impl = { 0 };
switch_core_session_get_read_impl(session, &read_impl);
if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
return SWITCH_STATUS_FALSE;
}
arg_recursion_check_start(args);
sh = ↰
codec = &lcodec;
timer = <imer;
if ((var = switch_channel_get_variable(channel, SWITCH_CACHE_SPEECH_HANDLES_VARIABLE)) && switch_true(var)) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cache enabled");
if ((cache_obj = (cached_speech_handle_t *) switch_channel_get_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME))) {
need_create = 0;
if (!strcasecmp(cache_obj->tts_name, tts_name)) {
need_alloc = 0;
} else {
switch_ivr_clear_speech_cache(session);
}
}
if (!cache_obj) {
cache_obj = (cached_speech_handle_t *) switch_core_session_alloc(session, sizeof(*cache_obj));
}
if (need_alloc) {
switch_copy_string(cache_obj->tts_name, tts_name, sizeof(cache_obj->tts_name));
switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
switch_channel_set_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME, cache_obj);
}
sh = &cache_obj->sh;
codec = &cache_obj->codec;
timer = &cache_obj->timer;
}
timer_name = switch_channel_get_variable(channel, "timer_name");
switch_core_session_reset(session, SWITCH_FALSE, SWITCH_FALSE);
rate = read_impl.actual_samples_per_second;
interval = read_impl.microseconds_per_packet / 1000;
channels = read_impl.number_of_channels;
if (need_create) {
memset(sh, 0, sizeof(*sh));
if ((status = switch_core_speech_open(sh, tts_name, voice_name, (uint32_t) rate, interval, read_impl.number_of_channels, &flags, NULL)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid TTS module %s[%s]!\n", tts_name, voice_name);
switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
switch_ivr_clear_speech_cache(session);
arg_recursion_check_stop(args);
return status;
}
} else if (cache_obj && strcasecmp(cache_obj->voice_name, voice_name)) {
switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
switch_core_speech_text_param_tts(sh, "voice", voice_name);
}
if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
flags = 0;
switch_core_speech_close(sh, &flags);
arg_recursion_check_stop(args);
return SWITCH_STATUS_FALSE;
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name);
codec_name = "L16";
if (need_create) {
if (switch_core_codec_init(codec,
codec_name,
NULL,
NULL, (int) rate, interval, channels, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL,
pool) == SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activated\n");
} else {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activation Failed %s@%uhz 1 channel %dms\n", codec_name,
rate, interval);
flags = 0;
switch_core_speech_close(sh, &flags);
switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
switch_ivr_clear_speech_cache(session);
arg_recursion_check_stop(args);
return SWITCH_STATUS_GENERR;
}
}
write_frame.codec = codec;
if (timer_name) {
if (need_create) {
if (switch_core_timer_init(timer, timer_name, interval, (int) sh->samples, pool) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Setup timer failed!\n");
switch_core_codec_destroy(write_frame.codec);
flags = 0;
switch_core_speech_close(sh, &flags);
switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
switch_ivr_clear_speech_cache(session);
arg_recursion_check_stop(args);
return SWITCH_STATUS_GENERR;
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Setup timer success %u bytes per %d ms!\n", sh->samples * 2,
interval);
}
switch_core_timer_sync(timer); // Sync timer
/* start a thread to absorb incoming audio */
switch_core_service_session(session);
}
status = switch_ivr_speak_text_handle(session, sh, write_frame.codec, timer_name ? timer : NULL, text, args);
flags = 0;
if (!cache_obj) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "no cache_obj");
switch_core_speech_close(sh, &flags);
switch_core_codec_destroy(codec);
}
if (timer_name) {
/* End the audio absorbing thread */
switch_core_thread_session_end(session);
if (!cache_obj) {
switch_core_timer_destroy(timer);
}
}
switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE);
arg_recursion_check_stop(args);
return status;
}
switch_core_soeech.c#switch_core_speech_open()
函数实际只是通过核心注册的接口调用到 unimrcp 模块的 mod_unimrcp.c#synth_speech_open()
函数
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
const char *module_name,
const char *voice_name,
unsigned int rate, unsigned int interval, unsigned int channels,
switch_speech_flag_t *flags, switch_memory_pool_t *pool)
{
switch_status_t status;
char buf[256] = "";
char *param = NULL;
if (!sh || !flags || zstr(module_name)) {
return SWITCH_STATUS_FALSE;
}
if (strchr(module_name, ':')) {
switch_set_string(buf, module_name);
if ((param = strchr(buf, ':'))) {
*param++ = '\0';
module_name = buf;
}
}
if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name);
return SWITCH_STATUS_GENERR;
}
sh->flags = *flags;
if (pool) {
sh->memory_pool = pool;
} else {
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
UNPROTECT_INTERFACE(sh->speech_interface);
return status;
}
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
sh->engine = switch_core_strdup(sh->memory_pool, module_name);
if (param) {
sh->param = switch_core_strdup(sh->memory_pool, param);
}
sh->rate = rate;
sh->name = switch_core_strdup(sh->memory_pool, module_name);
sh->samples = switch_samples_per_packet(rate, interval);
sh->samplerate = rate;
sh->native_rate = rate;
sh->channels = channels;
sh->real_channels = 1;
if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) {
switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
} else {
UNPROTECT_INTERFACE(sh->speech_interface);
}
return status;
}
mod_unimrcp.c#synth_speech_open()
函数的核心处理是创建一个 FreeSWITCH 层面的 speech_channel_t 对象,并调用 mod_unimrcp.c#speech_channel_open()
函数通过底层 MRCP 客户端建立与远程 MRCP 服务端之间的连接
static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
speech_channel_t *schannel = NULL;
const char *profile_name = sh->param;
profile_t *profile = NULL;
int speech_channel_number = get_next_speech_channel_number();
char *name = NULL;
char *session_uuid = NULL;
switch_hash_index_t *hi = NULL;
/* Name the channel */
if (profile_name && strchr(profile_name, ':')) {
/* Profile has session name appended to it. Pick it out */
profile_name = switch_core_strdup(sh->memory_pool, profile_name);
session_uuid = strchr(profile_name, ':');
*session_uuid = '\0';
session_uuid++;
session_uuid = switch_core_strdup(sh->memory_pool, session_uuid);
} else {
/* check if session is associated w/ this memory pool */
switch_core_session_t *session = switch_core_memory_pool_get_data(sh->memory_pool, "__session");
if (session) {
session_uuid = switch_core_session_get_uuid(session);
}
}
name = switch_core_sprintf(sh->memory_pool, "TTS-%d", speech_channel_number);
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO,
"speech_handle: name = %s, rate = %d, speed = %d, samples = %d, voice = %s, engine = %s, param = %s\n", sh->name, sh->rate,
sh->speed, sh->samples, sh->voice, sh->engine, sh->param);
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "voice = %s, rate = %d\n", voice_name, rate);
/* Allocate the channel */
if (speech_channel_create(&schannel, name, session_uuid, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t) rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {
status = SWITCH_STATUS_FALSE;
goto done;
}
sh->private_info = schannel;
schannel->fsh = sh;
/* Open the channel */
if (zstr(profile_name)) {
profile_name = globals.unimrcp_default_synth_profile;
}
profile = (profile_t *) switch_core_hash_find(globals.profiles, profile_name);
if (!profile) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_ERROR, "(%s) Can't find profile, %s\n", name, profile_name);
status = SWITCH_STATUS_FALSE;
goto done;
}
if ((status = speech_channel_open(schannel, profile)) != SWITCH_STATUS_SUCCESS) {
goto done;
}
/* Set session TTS params */
if (!zstr(voice_name)) {
speech_channel_set_param(schannel, "Voice-Name", voice_name);
}
/* Set default TTS params */
for (hi = switch_core_hash_first(profile->default_synth_params); hi; hi = switch_core_hash_next(&hi)) {
char *param_name = NULL, *param_val = NULL;
const void *key;
void *val;
switch_core_hash_this(hi, &key, NULL, &val);
param_name = (char *) key;
param_val = (char *) val;
speech_channel_set_param(schannel, param_name, param_val);
}
done:
return status;
}
mod_unimrcp.c#speech_channel_open()
函数主要逻辑是调用底层库函数创建 MRCP 会话,并建立连接
- 调用库函数
mrcp_application.c#mrcp_application_session_create()
创建 MRCP 会话- 调用库函数
mrcp_application.c#mrcp_application_channel_create()
创建 MRCP 会话下的 channel- 调用库函数
mrcp_application.c#mrcp_application_channel_add()
请求远程 MRCP 服务器创建新会话
static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
mpf_termination_t *termination = NULL;
mrcp_resource_type_e resource_type;
int warned = 0;
switch_mutex_lock(schannel->mutex);
/* make sure we can open channel */
if (schannel->state != SPEECH_CHANNEL_CLOSED) {
status = SWITCH_STATUS_FALSE;
goto done;
}
schannel->profile = profile;
/* create MRCP session */
if ((schannel->unimrcp_session = mrcp_application_session_create(schannel->application->app, profile->name, schannel)) == NULL) {
/* profile doesn't exist? */
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create session with %s\n", schannel->name, profile->name);
status = SWITCH_STATUS_RESTART;
goto done;
}
mrcp_application_session_name_set(schannel->unimrcp_session, schannel->name);
/* create audio termination and add to channel */
if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);
mrcp_application_session_destroy(schannel->unimrcp_session);
status = SWITCH_STATUS_FALSE;
goto done;
}
if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
resource_type = MRCP_SYNTHESIZER_RESOURCE;
} else {
resource_type = MRCP_RECOGNIZER_RESOURCE;
}
if ((schannel->unimrcp_channel = mrcp_application_channel_create(schannel->unimrcp_session, resource_type, termination, NULL, schannel)) == NULL) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create channel with %s\n", schannel->name, profile->name);
mrcp_application_session_destroy(schannel->unimrcp_session);
status = SWITCH_STATUS_FALSE;
goto done;
}
/* add channel to session... this establishes the connection to the MRCP server */
if (mrcp_application_channel_add(schannel->unimrcp_session, schannel->unimrcp_channel) != TRUE) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to add channel to session with %s\n", schannel->name, profile->name);
mrcp_application_session_destroy(schannel->unimrcp_session);
status = SWITCH_STATUS_FALSE;
goto done;
}
/* wait for channel to be ready */
warned = 0;
while (schannel->state == SPEECH_CHANNEL_CLOSED) {
if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
warned = 1;
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not opened after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
}
}
if (schannel->state == SPEECH_CHANNEL_READY) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) channel is ready\n", schannel->name);
} else if (schannel->state == SPEECH_CHANNEL_CLOSED) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Timed out waiting for channel to be ready\n", schannel->name);
/* can't retry */
status = SWITCH_STATUS_FALSE;
} else if (schannel->state == SPEECH_CHANNEL_ERROR) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) Terminating MRCP session\n", schannel->name);
if (!mrcp_application_session_terminate(schannel->unimrcp_session)) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) Unable to terminate application session\n", schannel->name);
status = SWITCH_STATUS_FALSE;
goto done;
}
/* Wait for session to be cleaned up */
warned = 0;
while (schannel->state == SPEECH_CHANNEL_ERROR) {
if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
warned = 1;
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not cleaned up after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
}
}
if (schannel->state != SPEECH_CHANNEL_CLOSED) {
/* major issue... can't retry */
status = SWITCH_STATUS_FALSE;
} else {
/* failed to open profile, retry is allowed */
status = SWITCH_STATUS_RESTART;
}
}
done:
switch_mutex_unlock(schannel->mutex);
return status;
}
此时回到本节步骤2第2步,switch_ivr_play_say.c#switch_ivr_speak_text_handle()
函数是 tts 处理的功能主体,关键处理如下:
- 通过核心函数
switch_core.c#switch_core_speech_feed_tts()
调用到mod_unimrcp.c#synth_speech_feed_tts()
函数发起 MRCP 语音合成请求- 在 for 空循环中不断执行核心函数
switch_core.c#switch_core_speech_read_tts()
调用到mod_unimrcp.c#synth_speech_read_tts()
函数尝试获取合成的语音- 通过核心函数
switch_core.c#switch_core_session_write_frame()
将 MRCP 服务器返回的语音流写到当前会话,通过 RTP 传输到 SIP 终端播放
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session,
switch_speech_handle_t *sh,
switch_codec_t *codec, switch_timer_t *timer, const char *text, switch_input_args_t *args)
{
switch_channel_t *channel = switch_core_session_get_channel(session);
short abuf[SWITCH_RECOMMENDED_BUFFER_SIZE];
switch_dtmf_t dtmf = { 0 };
uint32_t len = 0;
switch_size_t ilen = 0;
switch_frame_t write_frame = { 0 };
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_size_t extra = 0;
char *tmp = NULL;
const char *star, *pound, *p;
switch_size_t starlen, poundlen;
if (!sh) {
return SWITCH_STATUS_FALSE;
}
if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
return SWITCH_STATUS_FALSE;
}
if (!switch_core_codec_ready(codec)) {
return SWITCH_STATUS_FALSE;
}
arg_recursion_check_start(args);
write_frame.data = abuf;
write_frame.buflen = sizeof(abuf);
len = sh->samples * 2 * sh->channels;
flags = 0;
if (!(star = switch_channel_get_variable(channel, "star_replace"))) {
star = "star";
}
if (!(pound = switch_channel_get_variable(channel, "pound_replace"))) {
pound = "pound";
}
starlen = strlen(star);
poundlen = strlen(pound);
for (p = text; p && *p; p++) {
if (*p == '*') {
extra += starlen;
} else if (*p == '#') {
extra += poundlen;
}
}
if (extra) {
char *tp;
switch_size_t mylen = strlen(text) + extra + 1;
tmp = malloc(mylen);
if (!tmp) {
arg_recursion_check_stop(args);
return SWITCH_STATUS_MEMERR;
}
memset(tmp, 0, mylen);
tp = tmp;
for (p = text; p && *p; p++) {
if (*p == '*' ) {
snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", star);
tp += starlen;
} else if (*p == '#') {
snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", pound);
tp += poundlen;
} else {
*tp++ = *p;
}
}
text = tmp;
}
switch_core_speech_feed_tts(sh, text, &flags);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Speaking text: %s\n", text);
switch_safe_free(tmp);
text = NULL;
write_frame.rate = sh->rate;
memset(write_frame.data, 0, len);
write_frame.datalen = len;
write_frame.samples = len / 2;
write_frame.codec = codec;
switch_assert(codec->implementation != NULL);
switch_channel_audio_sync(channel);
for (;;) {
switch_event_t *event;
ilen = len;
if (!switch_channel_ready(channel)) {
status = SWITCH_STATUS_FALSE;
break;
}
if (switch_channel_test_flag(channel, CF_BREAK)) {
switch_channel_clear_flag(channel, CF_BREAK);
status = SWITCH_STATUS_BREAK;
break;
}
switch_ivr_parse_all_events(session);
if (args) {
/* dtmf handler function you can hook up to be executed when a digit is dialed during playback
* if you return anything but SWITCH_STATUS_SUCCESS the playback will stop.
*/
if (switch_channel_has_dtmf(channel)) {
if (!args->input_callback && !args->buf && !args->dmachine) {
status = SWITCH_STATUS_BREAK;
break;
}
if (args->buf && !strcasecmp(args->buf, "_break_")) {
status = SWITCH_STATUS_BREAK;
} else {
switch_channel_dequeue_dtmf(channel, &dtmf);
if (args->dmachine) {
char ds[2] = {dtmf.digit, '\0'};
if ((status = switch_ivr_dmachine_feed(args->dmachine, ds, NULL)) != SWITCH_STATUS_SUCCESS) {
break;
}
}
if (args->input_callback) {
status = args->input_callback(session, (void *) &dtmf, SWITCH_INPUT_TYPE_DTMF, args->buf, args->buflen);
} else if (args->buf) {
*((char *) args->buf) = dtmf.digit;
status = SWITCH_STATUS_BREAK;
}
}
}
if (args->input_callback) {
if (switch_core_session_dequeue_event(session, &event, SWITCH_FALSE) == SWITCH_STATUS_SUCCESS) {
switch_status_t ostatus = args->input_callback(session, event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);
if (ostatus != SWITCH_STATUS_SUCCESS) {
status = ostatus;
}
switch_event_destroy(&event);
}
}
if (status != SWITCH_STATUS_SUCCESS) {
break;
}
}
if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_PAUSE)) {
if (timer) {
if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
break;
}
} else {
switch_frame_t *read_frame;
switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);
while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
switch_ivr_parse_all_messages(session);
switch_yield(10000);
}
if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
break;
}
if (args && args->dmachine) {
if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
goto done;
}
}
if (args && (args->read_frame_callback)) {
if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
goto done;
}
}
}
continue;
}
flags = SWITCH_SPEECH_FLAG_BLOCKING;
status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags);
if (status != SWITCH_STATUS_SUCCESS) {
if (status == SWITCH_STATUS_BREAK) {
status = SWITCH_STATUS_SUCCESS;
}
break;
}
write_frame.datalen = (uint32_t) ilen;
write_frame.samples = (uint32_t) (ilen / 2 / sh->channels);
if (timer) {
write_frame.timestamp = timer->samplecount;
}
if (switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0) != SWITCH_STATUS_SUCCESS) {
break;
}
if (timer) {
if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
break;
}
} else { /* time off the channel (if you must) */
switch_frame_t *read_frame;
switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);
while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
switch_ivr_parse_all_messages(session);
switch_yield(10000);
}
if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
break;
}
if (args && args->dmachine) {
if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
goto done;
}
}
if (args && (args->read_frame_callback)) {
if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
goto done;
}
}
}
}
done:
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "done speaking text\n");
flags = 0;
switch_core_speech_flush_tts(sh);
arg_recursion_check_stop(args);
return status;
}
mod_unimrcp.c#synth_speech_feed_tts()
函数的核心其实是执行 mod_unimrcp.c#synth_channel_speak()
函数,mod_unimrcp.c#synth_channel_speak()
函数的核心处理如下:
- 调用底层库函数
mrcp_application.c#mrcp_application_message_create()
创建SYNTHESIZER_SPEAK
tts 请求的消息结构- 调用底层库函数
mrcp_application.c#mrcp_application_message_send()
触发执行向 MRCP 服务器发送语音合成请求- 等待 MRCP 服务器返回,将当前 tts 的 channel 状态流转为 SPEECH_CHANNEL_PROCESSING。这个部分主要依靠 unimrcp 模块加载时嵌入到底层 MRCP 客户端的回调
mod_unimrcp.c#synth_on_message_receive()
函数完成
static switch_status_t synth_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
speech_channel_t *schannel = (speech_channel_t *) sh->private_info;
if (zstr(text)) {
status = SWITCH_STATUS_FALSE;
} else {
status = synth_channel_speak(schannel, text);
}
return status;
}
static switch_status_t synth_channel_speak(speech_channel_t *schannel, const char *text)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
mrcp_message_t *mrcp_message = NULL;
mrcp_generic_header_t *generic_header = NULL;
mrcp_synth_header_t *synth_header = NULL;
int warned = 0;
switch_mutex_lock(schannel->mutex);
if (schannel->state != SPEECH_CHANNEL_READY) {
status = SWITCH_STATUS_FALSE;
goto done;
}
mrcp_message = mrcp_application_message_create(schannel->unimrcp_session, schannel->unimrcp_channel, SYNTHESIZER_SPEAK);
if (mrcp_message == NULL) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Failed to create SPEAK message\n", schannel->name);
status = SWITCH_STATUS_FALSE;
goto done;
}
/* set generic header fields (content-type) */
if ((generic_header = (mrcp_generic_header_t *) mrcp_generic_header_prepare(mrcp_message)) == NULL) {
status = SWITCH_STATUS_FALSE;
goto done;
}
/* good enough way of determining SSML or plain text body */
if (text_starts_with(text, XML_ID) || text_starts_with(text, SSML_ID)) {
apt_string_assign(&generic_header->content_type, schannel->profile->ssml_mime_type, mrcp_message->pool);
} else {
apt_string_assign(&generic_header->content_type, MIME_TYPE_PLAIN_TEXT, mrcp_message->pool);
}
mrcp_generic_header_property_add(mrcp_message, GENERIC_HEADER_CONTENT_TYPE);
/* set synthesizer header fields (voice, rate, etc.) */
if ((synth_header = (mrcp_synth_header_t *) mrcp_resource_header_prepare(mrcp_message)) == NULL) {
status = SWITCH_STATUS_FALSE;
goto done;
}
/* add params to MRCP message */
synth_channel_set_params(schannel, mrcp_message, generic_header, synth_header);
/* set body (plain text or SSML) */
apt_string_assign(&mrcp_message->body, text, schannel->memory_pool);
/* Empty audio queue and send SPEAK to MRCP server */
audio_queue_clear(schannel->audio_queue);
if (mrcp_application_message_send(schannel->unimrcp_session, schannel->unimrcp_channel, mrcp_message) == FALSE) {
status = SWITCH_STATUS_FALSE;
goto done;
}
/* wait for IN-PROGRESS */
while (schannel->state == SPEECH_CHANNEL_READY) {
if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
warned = 1;
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) SPEAK IN-PROGRESS not received after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
}
}
if (schannel->state != SPEECH_CHANNEL_PROCESSING) {
status = SWITCH_STATUS_FALSE;
goto done;
}
done:
switch_mutex_unlock(schannel->mutex);
return status;
}
mod_unimrcp.c#synth_speech_read_tts()
函数的核心是执行 mod_unimrcp.c#speech_channel_read()
,mod_unimrcp.c#speech_channel_read()
函数的关键则是检查 tts 的 channel 状态,当其状态符合要求的时候从 channel 的语音流缓冲队列中读取数据。此时回到本节步骤6第3步,switch_core.c#switch_core_session_write_frame()
函数会将从 MRCP 服务器传输过来到语音流数据写入到当前会话缓冲,经过编码转化,最终将通过 RTP 发送到终端播放,至此 tts 语音合成处理流程基本结束
static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_size_t bytes_read;
speech_channel_t *schannel = (speech_channel_t *) sh->private_info;
bytes_read = *datalen;
if (speech_channel_read(schannel, data, &bytes_read, (*flags & SWITCH_SPEECH_FLAG_BLOCKING)) == SWITCH_STATUS_SUCCESS) {
/* pad data, if not enough read */
if (bytes_read < *datalen) {
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read);
#endif
memset((uint8_t *) data + bytes_read, schannel->silence, *datalen - bytes_read);
}
} else {
/* ready for next speak request */
speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);
*datalen = 0;
status = SWITCH_STATUS_BREAK;
}
/* report negotiated sample rate back to FreeSWITCH */
sh->native_rate = schannel->rate;
return status;
}
static switch_status_t speech_channel_read(speech_channel_t *schannel, void *data, switch_size_t *len, int block)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
if (!schannel || !schannel->mutex || !schannel->audio_queue) {
return SWITCH_STATUS_FALSE;
}
switch (schannel->state) {
case SPEECH_CHANNEL_DONE:
/* pull any remaining audio - never blocking */
if (audio_queue_read(schannel->audio_queue, data, len, 0) == SWITCH_STATUS_FALSE) {
/* all frames read */
status = SWITCH_STATUS_BREAK;
}
break;
case SPEECH_CHANNEL_PROCESSING:
/* IN-PROGRESS */
audio_queue_read(schannel->audio_queue, data, len, block);
break;
default:
status = SWITCH_STATUS_BREAK;
}
return status;
}