本文主要是自己在借鉴了一些网上的关于pocketsphinx做ROS语音识别的博主(比如倔强不倒翁)的一些博文之后结合自己实践所做的一个应用总结,可以实现离线中文语音来对机器人进行语音控制。如有不足请多指教:
1.1 安装所需功能包
安装ros-kinetic-audio-common软件包:
sudo apt-get install ros-kinetic-audio-common
安装libasound2软件包:
sudo apt-get install libasound2
安装libgstreamer0.10软件包:
sudo apt-get install gstreamer0.10-*
安装libsphinxbase1软件包:https://packages.debian.org/jessie/amd64/libsphinxbase1/download
安装libpocketsphinx1软件包:https://packages.debian.org/jessie/amd64/libpocketsphinx1/download
安装gstreamer0.10-pocketsphinx软件包:https://packages.debian.org/jessie/amd64/gstreamer0.10-pocketsphinx/download
1.2 创建工作空间
mkdir -p voice_ws/src
cd voice_ws
catkin_make
1.3下载功能包
进入voice_ws/src中下载pocketsphinx功能包:
cd voice_ws/src
git clone https://github.com/mikeferguson/pocketsphinx
2.1 下载hmm、lm、dic文件
hmm文件:https://packages.debian.org/jessie/all/pocketsphinx-hmm-zh-tdt/download
提取出下载的文件,将其中data.tar.xz/./usr/share/pocketsphinx/model/hmm/zh/下的tdt_sc_8k文件复制到voice_ws/src/pocketsphinx下的demo中。
lm、dic文件:https://packages.debian.org/jessie/all/pocketsphinx-lm-zh-hans-gigatdt/download
提取出下载的文件,将其中data.tar.xz/./usr/share/pocketsphinx/model/lm/zh_CN/下的mandarin_notone.dic文件复制到voice_ws/src/pocketsphinx下的demo中。
2.2 生成自己的dic、lm文件
2.2.1 创建command.txt文件
gedit command.txt
文件内容示例:
小白
停下
前进
后退
左转
保存并退出
2.2.2 利用lmtool生成lm文件
进入:http://www.speech.cs.cmu.edu/tools/lmtool-new.html 网站,点击选择文件如下图所示,选取command.txt文件进行编译,将生成的.lm文件重命名为wake.lm并复制到voice_ws/src/pocketsphinx下的demo中。复制mandarin_notone.dic中与command.txt文件中词汇相同的词,并将command.txt重命名为wake.dic,并复制到voice_ws/src/pocketsphinx下的demo中。
wake.dic示例:
小白 x iao b ai
停下 t ing x ia
前进 q ian j in
后退 h ou t ui
左转 z uo zh uan
#!/usr/bin/env python
"""
recognizer.py is a wrapper for pocketsphinx.
parameters:
~lm - filename of language model
~dict - filename of dictionary
~mic_name - set the pulsesrc device name for the microphone input.
e.g. a Logitech G35 Headset has the following device name: alsa_input.usb-Logitech_Logitech_G35_Headset-00-Headset_1.analog-mono
To list audio device info on your machine, in a terminal type: pacmd list-sources
publications:
~output (std_msgs/String) - text output
services:
~start (std_srvs/Empty) - start speech recognition
~stop (std_srvs/Empty) - stop speech recognition
"""
import roslib; roslib.load_manifest('pocketsphinx')
import rospy
import pygtk
pygtk.require('2.0')
import gtk
import gobject
import pygst
pygst.require('0.10')
gobject.threads_init()
import gst
from std_msgs.msg import String
from std_srvs.srv import *
import os
import commands
class recognizer(object):
""" GStreamer based speech recognizer. """
def __init__(self):
# Start node
rospy.init_node("recognizer")
self._device_name_param = "~mic_name" # Find the name of your microphone by typing pacmd list-sources in the terminal
self._lm_param = "~lm"
self._dic_param = "~dict"
self._hmm_param = "~hmm"
# Configure mics with gstreamer launch config
if rospy.has_param(self._device_name_param):
self.device_name = rospy.get_param(self._device_name_param)
self.device_index = self.pulse_index_from_name(self.device_name)
self.launch_config = "pulsesrc device=" + str(self.device_index)
rospy.loginfo("Using: pulsesrc device=%s name=%s", self.device_index, self.device_name)
elif rospy.has_param('~source'):
# common sources: 'alsasrc'
self.launch_config = rospy.get_param('~source')
else:
self.launch_config = 'gconfaudiosrc'
rospy.loginfo("Launch config: %s", self.launch_config)
self.launch_config += " ! audioconvert ! audioresample " \
+ '! vader name=vad auto-threshold=true ' \
+ '! pocketsphinx name=asr ! fakesink'
# Configure ROS settings
self.started = False
rospy.on_shutdown(self.shutdown)
self.pub = rospy.Publisher('~output', String)
rospy.Service("~start", Empty, self.start)
rospy.Service("~stop", Empty, self.stop)
if rospy.has_param(self._lm_param) and rospy.has_param(self._dic_param):
self.start_recognizer()
else:
rospy.logwarn("lm and dic parameters need to be set to start recognizer.")
def start_recognizer(self):
rospy.loginfo("Starting recognizer... ")
self.pipeline = gst.parse_launch(self.launch_config)
self.asr = self.pipeline.get_by_name('asr')
self.asr.connect('partial_result', self.asr_partial_result)
self.asr.connect('result', self.asr_result)
#self.asr.set_property('configured', True)
self.asr.set_property('dsratio', 1)
# Configure language model
if rospy.has_param(self._lm_param):
lm = rospy.get_param(self._lm_param)
else:
rospy.logerr('Recognizer not started. Please specify a language model file.')
return
if rospy.has_param(self._dic_param):
dic = rospy.get_param(self._dic_param)
else:
rospy.logerr('Recognizer not started. Please specify a dictionary.')
return
if rospy.has_param(self._hmm_param):
hmm = rospy.get_param(self._hmm_param)
else:
rospy.logerr('Recognizer not started. Please specify a dictionary.')
return
self.asr.set_property('lm', lm)
self.asr.set_property('dict', dic)
self.asr.set_property('hmm', hmm)
self.bus = self.pipeline.get_bus()
self.bus.add_signal_watch()
self.bus_id = self.bus.connect('message::application', self.application_message)
self.pipeline.set_state(gst.STATE_PLAYING)
self.started = True
def pulse_index_from_name(self, name):
output = commands.getstatusoutput("pacmd list-sources | grep -B 1 'name: <" + name + ">' | grep -o -P '(?<=index: )[0-9]*'")
if len(output) == 2:
return output[1]
else:
raise Exception("Error. pulse index doesn't exist for name: " + name)
def stop_recognizer(self):
if self.started:
self.pipeline.set_state(gst.STATE_NULL)
self.pipeline.remove(self.asr)
self.bus.disconnect(self.bus_id)
self.started = False
def shutdown(self):
""" Delete any remaining parameters so they don't affect next launch """
for param in [self._device_name_param, self._lm_param, self._dic_param]:
if rospy.has_param(param):
rospy.delete_param(param)
""" Shutdown the GTK thread. """
gtk.main_quit()
def start(self, req):
self.start_recognizer()
rospy.loginfo("recognizer started")
return EmptyResponse()
def stop(self, req):
self.stop_recognizer()
rospy.loginfo("recognizer stopped")
return EmptyResponse()
def asr_partial_result(self, asr, text, uttid):
""" Forward partial result signals on the bus to the main thread. """
struct = gst.Structure('partial_result')
struct.set_value('hyp', text)
struct.set_value('uttid', uttid)
asr.post_message(gst.message_new_application(asr, struct))
def asr_result(self, asr, text, uttid):
""" Forward result signals on the bus to the main thread. """
struct = gst.Structure('result')
struct.set_value('hyp', text)
struct.set_value('uttid', uttid)
asr.post_message(gst.message_new_application(asr, struct))
def application_message(self, bus, msg):
""" Receive application messages from the bus. """
msgtype = msg.structure.get_name()
if msgtype == 'partial_result':
self.partial_result(msg.structure['hyp'], msg.structure['uttid'])
if msgtype == 'result':
self.final_result(msg.structure['hyp'], msg.structure['uttid'])
def partial_result(self, hyp, uttid):
""" Delete any previous selection, insert text and select it. """
rospy.logdebug("Partial: " + hyp)
def final_result(self, hyp, uttid):
""" Insert the final result. """
msg = String()
msg.data = str(hyp.lower())
rospy.loginfo(msg.data)
self.pub.publish(msg)
if __name__ == "__main__":
start = recognizer()
gtk.main()
gedit voice_cmd.launch
├── CHANGELOG.rst
├── CMakeLists.txt
├── demo
│ ├── mandarin_notone.dic
│ ├── tdt_sc_8k
│ │ ├── feat.params
│ │ ├── mdef
│ │ ├── means
│ │ ├── noisedict
│ │ ├── sendump
│ │ ├── transition_matrices
│ │ └── variances
│ ├── voice_cmd.launch
│ ├── wake.dic
│ └── wake.lm
├── nodes
│ ├── recognizer.py
│ └── voice_cmd_vel.py
├── package.xml
└── README.md
1、接入外部麦克风时,需要在声音中设置麦克风为输入。
2、第一次启动roslaunch命令时,会识别不出,应该输入一次 ,运行之后中断在输入一次指令。
source devel/setup.bash
roslaunch pocketsphinx voice_cmd.launch
source devel/setup.bash
roslaunch pocketsphinx voice_cmd.launch
源码链接: 链接:https://pan.baidu.com/s/1LzET5CKN3c_3cBEWeW3LZA 提取码:chil