ubuntu16.04+ROS+科大讯飞+图灵AI机器人(三)——语音识别

上一篇中我们已经实现了tts(语音合成)

接下来我们再来实现在线语音识别

在这个过程中我走了不少弯路,网上的博客几乎全部看了一遍,有不少还是很有问题的 特在此总结一个切实可用的方法

希望大家觉得ok的话给我点赞

第一步 将samples里面的iat_online_record_sample里面的三个.c文件拷贝到xf_voice的src中,并注意将主文件iat...后缀改成.cpp

不然会出错;同时,将samples里面include中的.h文件全部拷贝到Robot/voice/inc目录下

第二步 修改iat_online_record_sample.cpp

名称改为fuck_asr.cpp(为什么骂人呢,因为一直出bug心情烦躁,个人经验,名字贱好养活)

里面代码改成下面的代码

#include

#include

#include

#include

#include

#include

#include

#include "/Robot/voice/inc/qisr.h"

#include "/Robot/voice/inc/msp_cmn.h"

#include "/Robot/voice/inc/msp_errors.h"

#include "/Robot/voice/inc/speech_recognizer.h"

 

#define FRAME_LEN   640

#define BUFFER_SIZE 4096

#define ASRFLAG 1

 

using namespace std;

 

bool flag = false;

bool recorder_Flag = true;

string result = "";

 

/* Upload User words */

static int upload_userwords()

{

    char*           userwords   =   NULL;

    size_t          len         =   0;

    size_t          read_len    =   0;

    FILE*           fp          =   NULL;

    int             ret         =   -1;

 

    fp = fopen("userwords.txt", "rb");

    if (NULL == fp)

    {

        printf("\nopen [userwords.txt] failed! \n");

        goto upload_exit;

    }

 

    fseek(fp, 0, SEEK_END);

    len = ftell(fp);

    fseek(fp, 0, SEEK_SET);

 

    userwords = (char*)malloc(len + 1);

    if (NULL == userwords)

    {

        printf("\nout of memory! \n");

        goto upload_exit;

    }

 

    read_len = fread((void*)userwords, 1, len, fp);

    if (read_len != len)

    {

        printf("\nread [userwords.txt] failed!\n");

        goto upload_exit;

    }

    userwords[len] = '\0';

 

    MSPUploadData("userwords", userwords, len, "sub = uup, dtt = userword", &ret);

    if (MSP_SUCCESS != ret)

    {

        printf("\nMSPUploadData failed ! errorCode: %d \n", ret);

        goto upload_exit;

    }

 

upload_exit:

    if (NULL != fp)

    {

        fclose(fp);

        fp = NULL;

    }

    if (NULL != userwords)

    {

        free(userwords);

        userwords = NULL;

    }

 

    return ret;

}


 

static void show_result(char *str, char is_over)

{

    printf("\rResult: [ %s ]", str);

    if(is_over)

        putchar('\n');

string s(str);

result = s;

flag = true; //设置发布话题为真

}

 

static char *g_result = NULL;

static unsigned int g_buffersize = BUFFER_SIZE;

 

void on_result(const char *result, char is_last)

{

    if (result) {

        size_t left = g_buffersize - 1 - strlen(g_result);

        size_t size = strlen(result);

        if (left < size) {

            g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);

            if (g_result)

                g_buffersize += BUFFER_SIZE;

            else {

                printf("mem alloc failed\n");

                return;

            }

        }

        strncat(g_result, result, size);

        show_result(g_result, is_last);

    }

}

void on_speech_begin()

{

    if (g_result)

    {

        free(g_result);

    }

    g_result = (char*)malloc(BUFFER_SIZE);

    g_buffersize = BUFFER_SIZE;

    memset(g_result, 0, g_buffersize);

 

    printf("Start Listening...\n");

}

void on_speech_end(int reason)

{

    if (reason == END_REASON_VAD_DETECT)

    {

        printf("\nSpeaking done \n");

        recorder_Flag = false;

    }

    else

        printf("\nRecognizer error %d\n", reason);

}

 

/* demo recognize the audio from microphone */

static void demo_mic(const char* session_begin_params)

{

    int errcode;

    int i = 0;

 

    struct speech_rec iat;

 

    struct speech_rec_notifier recnotifier = {

        on_result,

        on_speech_begin,

        on_speech_end

    };

 

    errcode = sr_init(&iat, session_begin_params, SR_MIC, &recnotifier);

    if (errcode) {

        printf("speech recognizer init failed\n");

        return;

    }

    errcode = sr_start_listening(&iat);

    if (errcode) {

        printf("start listen failed %d\n", errcode);

    }

    /* demo 15 seconds recording */

    while(recorder_Flag)

    {

     sleep(1);

    }

    errcode = sr_stop_listening(&iat);

    if (errcode) {

        printf("stop listening failed %d\n", errcode);

    }

 

    sr_uninit(&iat);

}

 

/*

* 打开麦克风 录音 发送到服务器

*/

void asrProcess()

{

    int ret = MSP_SUCCESS;

    int upload_on = 1; /* whether upload the user word */

    /* login params, please do keep the appid correct */

    const char* login_params = "appid = 5d10ba2c, work_dir = .";

 

    /*

    * See "iFlytek MSC Reference Manual"

    */

    const char* session_begin_params =

        "sub = iat, domain = iat, language = zh_cn, "

        "accent = mandarin, sample_rate = 16000, "

        "result_type = plain, result_encoding = utf8";

 

    /* Login first. the 1st arg is username, the 2nd arg is password

     * just set them as NULL. the 3rd arg is login paramertes

     * */

    ret = MSPLogin(NULL, NULL, login_params);

    if (MSP_SUCCESS != ret) {

        printf("MSPLogin failed , Error code %d.\n",ret);

        goto exit; // login fail, exit the program

    }

 

/*

    if (upload_on)

    {

        printf("Uploading the user words ...\n");

        ret = upload_userwords();

        if (MSP_SUCCESS != ret)

            goto exit;

        printf("Uploaded successfully\n");

    }

*/

 

        demo_mic(session_begin_params);

 

exit:

    MSPLogout(); // Logout...

}

 

/*

* 根据发布的话题来修改录音标志

*/

void asrCallBack(const std_msgs::Int32::ConstPtr &msg)

{

 

ROS_INFO_STREAM("Topic is Subscriber");

if(msg->data == ASRFLAG)

{

asrProcess();

}

}

 

/* main thread: start/stop record ; query the result of recgonization.

* record thread: record callback(data write)

* helper thread: ui(keystroke detection)

*/

int main(int argc, char* argv[])

{

ros::init(argc, argv, "xf_asr_node");

ros::NodeHandle nd;

 

ros::Subscriber sub = nd.subscribe("/voice/xf_asr_topic", 1, asrCallBack);

ros::Publisher pub = nd.advertise("/voice/tuling_nlu_topic", 10);

 

ros::Rate loop_rate(10);

 

while(ros::ok())

{

if(flag)

{

std_msgs::String msg;

msg.data = result;

pub.publish(msg);

flag = false;

recorder_Flag = true;

}

 

ros::spinOnce();

loop_rate.sleep();

}


 

    return 0;

}

 

然后Cmake配置为下面所示

cmake_minimum_required(VERSION 2.8.3)
project(xf_voice)

## Compile as C++11, supported in ROS Kinetic and newer
# add_compile_options(-std=c++11)

## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS
  roscpp
  rospy
  std_msgs
)

## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)


## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
# catkin_python_setup()

################################################
## Declare ROS messages, services and actions ##
################################################

## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
##   * add a build_depend tag for "message_generation"
##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
##     but can be declared for certainty nonetheless:
##     * add a exec_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
##   * add "message_generation" and every package in MSG_DEP_SET to
##     find_package(catkin REQUIRED COMPONENTS ...)
##   * add "message_runtime" and every package in MSG_DEP_SET to
##     catkin_package(CATKIN_DEPENDS ...)
##   * uncomment the add_*_files sections below as needed
##     and list every .msg/.srv/.action file to be processed
##   * uncomment the generate_messages entry below
##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)

## Generate messages in the 'msg' folder
# add_message_files(
#   FILES
#   Message1.msg
#   Message2.msg
# )

## Generate services in the 'srv' folder
# add_service_files(
#   FILES
#   Service1.srv
#   Service2.srv
# )

## Generate actions in the 'action' folder
# add_action_files(
#   FILES
#   Action1.action
#   Action2.action
# )

## Generate added messages and services with any dependencies listed here
# generate_messages(
#   DEPENDENCIES
#   std_msgs
# )

################################################
## Declare ROS dynamic reconfigure parameters ##
################################################

## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
##   * add "dynamic_reconfigure" to
##     find_package(catkin REQUIRED COMPONENTS ...)
##   * uncomment the "generate_dynamic_reconfigure_options" section below
##     and list every .cfg file to be processed

## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
#   cfg/DynReconf1.cfg
#   cfg/DynReconf2.cfg
# )

###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if your package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
#  INCLUDE_DIRS include
#  LIBRARIES xf_voice
#  CATKIN_DEPENDS roscpp rospy std_msgs
#  DEPENDS system_lib
)

###########
## Build ##
###########

## Specify additional locations of header files
## Your package locations should be listed before other locations
include_directories(
  include ${catkin_INCLUDE_DIRS}
#include
  ${catkin_INCLUDE_DIRS}
)

## Declare a C++ library
# add_library(${PROJECT_NAME}
#   src/${PROJECT_NAME}/xf_voice.cpp
# )

## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})

## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
# add_executable(${PROJECT_NAME}_node src/xf_voice_node.cpp)

## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
## target back to the shorter version for ease of user use
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")

## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})

## Specify libraries to link a library or executable target against
# target_link_libraries(${PROJECT_NAME}_node
#   ${catkin_LIBRARIES}
# )

#############
## Install ##
#############

# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html

## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
#   scripts/my_python_script
#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )

## Mark executables and/or libraries for installation
# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )

## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
#   FILES_MATCHING PATTERN "*.h"
#   PATTERN ".svn" EXCLUDE
# )

## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
#   # myfile1
#   # myfile2
#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )

#############
## Testing ##
#############

## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_xf_voice.cpp)
# if(TARGET ${PROJECT_NAME}-test)
#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()

## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
add_executable(xf_tts src/xf_tts.cpp)
target_link_libraries(xf_tts ${catkin_LIBRARIES} -lmsc  -ldl -lpthread -lm -lrt)
add_dependencies(xf_tts xf_voice_generate_messages_cpp)

#add_executable(xf_asr )
#target_link_libraries(xf_asr ${catkin_LIBRARIES} /Robot/voice/lib/libmsc.so  -ldl -lpthread -lm -lrt -lasound)
#add_dependencies(xf_asr xf_voice_generate_messages_cpp)

add_executable(tuling_nlu src/tuling_nlu.cpp)
target_link_libraries(tuling_nlu ${catkin_LIBRARIES} -ljsoncpp -lcurl)


add_executable(fuck_asr_node src/fuck_asr.cpp src/speech_recognizer.c src/linuxrec.c)
target_link_libraries(fuck_asr_node  ${catkin_LIBRARIES} -lmsc -lrt -ldl -lpthread -lasound)

然后修改一下另外两个c文件的头文件目录位置,进入catkin编译,可能会出现语法错误,都是一些字符不能识别之类的小错,把错误位置的符号重新打一遍就好

至此在线语音识别配置好啦 

大家可以体验一下啦

cd catkin_ws

rosrun xf_voice fuck_asr_node

然后rostopic pub -1 /voice/xf_asr_topic std_msgs/Int32 1

这句话的意思是开启麦克风 如果想自动开启也可以在代码中修改

好啦 下面就可以对着麦克风喊话了 result会显示你喊的脏话,哦不  话

 

 

你可能感兴趣的:(ROS)