上一篇中我们已经实现了tts(语音合成)
接下来我们再来实现在线语音识别
在这个过程中我走了不少弯路,网上的博客几乎全部看了一遍,有不少还是很有问题的 特在此总结一个切实可用的方法
希望大家觉得ok的话给我点赞
第一步 将samples里面的iat_online_record_sample里面的三个.c文件拷贝到xf_voice的src中,并注意将主文件iat...后缀改成.cpp
不然会出错;同时,将samples里面include中的.h文件全部拷贝到Robot/voice/inc目录下
第二步 修改iat_online_record_sample.cpp
名称改为fuck_asr.cpp(为什么骂人呢,因为一直出bug心情烦躁,个人经验,名字贱好养活)
里面代码改成下面的代码
#include
#include
#include
#include
#include
#include
#include
#include "/Robot/voice/inc/qisr.h"
#include "/Robot/voice/inc/msp_cmn.h"
#include "/Robot/voice/inc/msp_errors.h"
#include "/Robot/voice/inc/speech_recognizer.h"
#define FRAME_LEN 640
#define BUFFER_SIZE 4096
#define ASRFLAG 1
using namespace std;
bool flag = false;
bool recorder_Flag = true;
string result = "";
/* Upload User words */
static int upload_userwords()
{
char* userwords = NULL;
size_t len = 0;
size_t read_len = 0;
FILE* fp = NULL;
int ret = -1;
fp = fopen("userwords.txt", "rb");
if (NULL == fp)
{
printf("\nopen [userwords.txt] failed! \n");
goto upload_exit;
}
fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);
userwords = (char*)malloc(len + 1);
if (NULL == userwords)
{
printf("\nout of memory! \n");
goto upload_exit;
}
read_len = fread((void*)userwords, 1, len, fp);
if (read_len != len)
{
printf("\nread [userwords.txt] failed!\n");
goto upload_exit;
}
userwords[len] = '\0';
MSPUploadData("userwords", userwords, len, "sub = uup, dtt = userword", &ret);
if (MSP_SUCCESS != ret)
{
printf("\nMSPUploadData failed ! errorCode: %d \n", ret);
goto upload_exit;
}
upload_exit:
if (NULL != fp)
{
fclose(fp);
fp = NULL;
}
if (NULL != userwords)
{
free(userwords);
userwords = NULL;
}
return ret;
}
static void show_result(char *str, char is_over)
{
printf("\rResult: [ %s ]", str);
if(is_over)
putchar('\n');
string s(str);
result = s;
flag = true; //设置发布话题为真
}
static char *g_result = NULL;
static unsigned int g_buffersize = BUFFER_SIZE;
void on_result(const char *result, char is_last)
{
if (result) {
size_t left = g_buffersize - 1 - strlen(g_result);
size_t size = strlen(result);
if (left < size) {
g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);
if (g_result)
g_buffersize += BUFFER_SIZE;
else {
printf("mem alloc failed\n");
return;
}
}
strncat(g_result, result, size);
show_result(g_result, is_last);
}
}
void on_speech_begin()
{
if (g_result)
{
free(g_result);
}
g_result = (char*)malloc(BUFFER_SIZE);
g_buffersize = BUFFER_SIZE;
memset(g_result, 0, g_buffersize);
printf("Start Listening...\n");
}
void on_speech_end(int reason)
{
if (reason == END_REASON_VAD_DETECT)
{
printf("\nSpeaking done \n");
recorder_Flag = false;
}
else
printf("\nRecognizer error %d\n", reason);
}
/* demo recognize the audio from microphone */
static void demo_mic(const char* session_begin_params)
{
int errcode;
int i = 0;
struct speech_rec iat;
struct speech_rec_notifier recnotifier = {
on_result,
on_speech_begin,
on_speech_end
};
errcode = sr_init(&iat, session_begin_params, SR_MIC, &recnotifier);
if (errcode) {
printf("speech recognizer init failed\n");
return;
}
errcode = sr_start_listening(&iat);
if (errcode) {
printf("start listen failed %d\n", errcode);
}
/* demo 15 seconds recording */
while(recorder_Flag)
{
sleep(1);
}
errcode = sr_stop_listening(&iat);
if (errcode) {
printf("stop listening failed %d\n", errcode);
}
sr_uninit(&iat);
}
/*
* 打开麦克风 录音 发送到服务器
*/
void asrProcess()
{
int ret = MSP_SUCCESS;
int upload_on = 1; /* whether upload the user word */
/* login params, please do keep the appid correct */
const char* login_params = "appid = 5d10ba2c, work_dir = .";
/*
* See "iFlytek MSC Reference Manual"
*/
const char* session_begin_params =
"sub = iat, domain = iat, language = zh_cn, "
"accent = mandarin, sample_rate = 16000, "
"result_type = plain, result_encoding = utf8";
/* Login first. the 1st arg is username, the 2nd arg is password
* just set them as NULL. the 3rd arg is login paramertes
* */
ret = MSPLogin(NULL, NULL, login_params);
if (MSP_SUCCESS != ret) {
printf("MSPLogin failed , Error code %d.\n",ret);
goto exit; // login fail, exit the program
}
/*
if (upload_on)
{
printf("Uploading the user words ...\n");
ret = upload_userwords();
if (MSP_SUCCESS != ret)
goto exit;
printf("Uploaded successfully\n");
}
*/
demo_mic(session_begin_params);
exit:
MSPLogout(); // Logout...
}
/*
* 根据发布的话题来修改录音标志
*/
void asrCallBack(const std_msgs::Int32::ConstPtr &msg)
{
ROS_INFO_STREAM("Topic is Subscriber");
if(msg->data == ASRFLAG)
{
asrProcess();
}
}
/* main thread: start/stop record ; query the result of recgonization.
* record thread: record callback(data write)
* helper thread: ui(keystroke detection)
*/
int main(int argc, char* argv[])
{
ros::init(argc, argv, "xf_asr_node");
ros::NodeHandle nd;
ros::Subscriber sub = nd.subscribe("/voice/xf_asr_topic", 1, asrCallBack);
ros::Publisher pub = nd.advertise
ros::Rate loop_rate(10);
while(ros::ok())
{
if(flag)
{
std_msgs::String msg;
msg.data = result;
pub.publish(msg);
flag = false;
recorder_Flag = true;
}
ros::spinOnce();
loop_rate.sleep();
}
return 0;
}
然后Cmake配置为下面所示
cmake_minimum_required(VERSION 2.8.3)
project(xf_voice)
## Compile as C++11, supported in ROS Kinetic and newer
# add_compile_options(-std=c++11)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS
roscpp
rospy
std_msgs
)
## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
# catkin_python_setup()
################################################
## Declare ROS messages, services and actions ##
################################################
## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
## * add a build_depend tag for "message_generation"
## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
## but can be declared for certainty nonetheless:
## * add a exec_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
## * add "message_generation" and every package in MSG_DEP_SET to
## find_package(catkin REQUIRED COMPONENTS ...)
## * add "message_runtime" and every package in MSG_DEP_SET to
## catkin_package(CATKIN_DEPENDS ...)
## * uncomment the add_*_files sections below as needed
## and list every .msg/.srv/.action file to be processed
## * uncomment the generate_messages entry below
## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
## Generate messages in the 'msg' folder
# add_message_files(
# FILES
# Message1.msg
# Message2.msg
# )
## Generate services in the 'srv' folder
# add_service_files(
# FILES
# Service1.srv
# Service2.srv
# )
## Generate actions in the 'action' folder
# add_action_files(
# FILES
# Action1.action
# Action2.action
# )
## Generate added messages and services with any dependencies listed here
# generate_messages(
# DEPENDENCIES
# std_msgs
# )
################################################
## Declare ROS dynamic reconfigure parameters ##
################################################
## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
## * add "dynamic_reconfigure" to
## find_package(catkin REQUIRED COMPONENTS ...)
## * uncomment the "generate_dynamic_reconfigure_options" section below
## and list every .cfg file to be processed
## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
# cfg/DynReconf1.cfg
# cfg/DynReconf2.cfg
# )
###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if your package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
# INCLUDE_DIRS include
# LIBRARIES xf_voice
# CATKIN_DEPENDS roscpp rospy std_msgs
# DEPENDS system_lib
)
###########
## Build ##
###########
## Specify additional locations of header files
## Your package locations should be listed before other locations
include_directories(
include ${catkin_INCLUDE_DIRS}
#include
${catkin_INCLUDE_DIRS}
)
## Declare a C++ library
# add_library(${PROJECT_NAME}
# src/${PROJECT_NAME}/xf_voice.cpp
# )
## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
# add_executable(${PROJECT_NAME}_node src/xf_voice_node.cpp)
## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
## target back to the shorter version for ease of user use
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Specify libraries to link a library or executable target against
# target_link_libraries(${PROJECT_NAME}_node
# ${catkin_LIBRARIES}
# )
#############
## Install ##
#############
# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
# scripts/my_python_script
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark executables and/or libraries for installation
# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
# FILES_MATCHING PATTERN "*.h"
# PATTERN ".svn" EXCLUDE
# )
## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
# # myfile1
# # myfile2
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )
#############
## Testing ##
#############
## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_xf_voice.cpp)
# if(TARGET ${PROJECT_NAME}-test)
# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()
## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
add_executable(xf_tts src/xf_tts.cpp)
target_link_libraries(xf_tts ${catkin_LIBRARIES} -lmsc -ldl -lpthread -lm -lrt)
add_dependencies(xf_tts xf_voice_generate_messages_cpp)
#add_executable(xf_asr )
#target_link_libraries(xf_asr ${catkin_LIBRARIES} /Robot/voice/lib/libmsc.so -ldl -lpthread -lm -lrt -lasound)
#add_dependencies(xf_asr xf_voice_generate_messages_cpp)
add_executable(tuling_nlu src/tuling_nlu.cpp)
target_link_libraries(tuling_nlu ${catkin_LIBRARIES} -ljsoncpp -lcurl)
add_executable(fuck_asr_node src/fuck_asr.cpp src/speech_recognizer.c src/linuxrec.c)
target_link_libraries(fuck_asr_node ${catkin_LIBRARIES} -lmsc -lrt -ldl -lpthread -lasound)
然后修改一下另外两个c文件的头文件目录位置,进入catkin编译,可能会出现语法错误,都是一些字符不能识别之类的小错,把错误位置的符号重新打一遍就好
至此在线语音识别配置好啦
大家可以体验一下啦
cd catkin_ws
rosrun xf_voice fuck_asr_node
然后rostopic pub -1 /voice/xf_asr_topic std_msgs/Int32 1
这句话的意思是开启麦克风 如果想自动开启也可以在代码中修改
好啦 下面就可以对着麦克风喊话了 result会显示你喊的脏话,哦不 话