硬件:
1丶音响 (播放声音)
2丶麦克风(录制声音)
3丶树莓派
软件:
1丶python2.7(模块:pyaudio wave requests urllib urllib2 sys)
总共5步:
第一步:录音
第二步:语音识别
第三步:图灵回复
第四步:语音合成
第五步:播放
第一步 录音:
录音使用的是linux的arecord
def recordVoice(self):
print “开始录音…”
os.system(‘sudo arecord -D “plughw:1” -f S16_LE -r 16000 -d 4 %s’%self.RECORD_PATH)
print “录音结束…”
第二步 语音识别:
语音识别的话用到了百度的语音识别API,相关的文档地址:http://ai.baidu.com/docs#/ASR-API/top
语音识别的介绍都在里面,有兴趣的可以自己去看下。下面贴上代码的截图QQ图片20180211194016
第三步 图灵回复:
因为是对话机器人,自然说话了要有回复。那回复就用到了图灵的回复。图灵的回复应该就目前来说应该是比较智能的了,而且使用起来也比较简单。代码截图QQ图片20180211195005
代码里面的API_KEY需要自己去图灵的官网申请账号,然后会获得一个API_KEY
第四步 语音合成:
语音合成也是用到了百度的API,顺便说下代码里面用到的client_id client_secret 都是需要自己去申请账号获得的。建议去看下相关文档。文档链接:http://ai.baidu.com/docs#/TTS-API/top
附上代码截图:
QQ图片20180211195005
第五步 播放:
播放声音的话我没有用前面录音用的pyaudio。而是用了mp3play 为什么呢?因为pyaudio不能够播放mp3格式的文件。最开始我尝试将合成的声音保存为wav格式的,但是wave却报错了,可能是下载下来的问题。但是在linux系统却不能使用mp3play 于是我选择了mpg123 下面是代码:
def playVoice(self,url): #播放声音
print url
os.system(‘mpg123 “%s”’%url)
代码截图全部贴完了,但是还是可能会遇到一些问题。在windows中安装这些要用到的模块都没什么问题,但是在Linux中却会遇到一些问题。例如安装pyaudio时候就会报错 找不到portaudio.h文件 解决办法如下:
linux 安装pyaudio:
sudo apt-get install portaudio19-dev
pip install –allow-unverified = pyaudio pyaud
其他的安装一般都没什么问题。下面附上全部的代码。
voice.py
“””
autor : AnE
“””
import sys
import os
reload(sys)
sys.setdefaultencoding(“utf-8”)
class Voice:
def init(self):
self.RECORD_PATH = r”./record_voice.wav”
# def savaVoice(self,data): #保存录音文件
# f = wave.open(self.RECORD_PATH,"wb")
# f.setframerate(self.RATE)
# f.setnchannels(self.CHANNELS)
# f.setsampwidth(pyaudio.PyAudio().get_sample_size(self.FORMAT))
# f.writeframes("".join(data))
# f.close()
# def recordVoice(self): #录音
# pa = pyaudio.PyAudio()
#
# stream = pa.open(format = self.FORMAT,
# channels = self.CHANNELS,
# rate = self.RATE,
# frames_per_buffer = self.CHUNK,
# input = True)
#
# voicedata_list = []
# print u"正在录音..."
# for i in range(0,int(self.RATE/self.CHUNK*self.record_time)):
# voicedata = stream.read(self.CHUNK)
# voicedata_list.append(voicedata)
# print u"录音结束..."
#
# stream.stop_stream()
# stream.close()
# pa.terminate()
# self.savaVoice(voicedata_list)
def recordVoice(self):
print "开始录音..."
os.system('sudo arecord -D "plughw:1" -f S16_LE -r 16000 -d 4 %s'%self.RECORD_PATH)
print "录音结束..."
def playVoice(self,url): #播放声音
print url
os.system('mpg123 "%s"'%url)
voiceAPI.py
“””
autor : AnE
“””
import sys
import requests
import json
import urllib2
import base64
import urllib
reload(sys)
sys.setdefaultencoding(“utf-8”)
class BaiDuAPI:
def init(self):
self.GRANT_TYPE = “client_credentials”
self.CLIENT_ID = “b6lN1eECXn1aRoK9PiwiqwWT” #百度应用的 API Key
self.CLIENT_SECRET = “t8KSgal9vgoy5z0AagKOATmPsUrfEiyx” #百度应用的 API Secret
self.TOKEN_URL = “https://openapi.baidu.com/oauth/2.0/token”
self.RECOGNITION_URL = “http://vop.baidu.com/server_api”
self.CUID = “B8-27-EB-BA-24-14”
self.RECOGNITION_PATH = r”./record_voice.wav”
# self.SYNTHESIS_PATH = r”./play_voice.mp3”
def getToken(self): #获取access_token
body = {
"grant_type":self.GRANT_TYPE,
"client_id":self.CLIENT_ID,
"client_secret":self.CLIENT_SECRET
}
r = requests.post(self.TOKEN_URL,data=body,verify=True)
self.access_token = json.loads(r.text)["access_token"]
return self.access_token
def voiceRecognition(self): #语音识别
erro_dict = {
3300:"输入参数不正确",
3301:"音频质量过差",
3302:"鉴权失败",
3303:"语音服务器后端问题",
3304:"用户的请求QPS超限",
3305:"用户的日pv(日请求量)超限",
3307:"语音服务器后端识别出错问题",
3308:"音频过长",
3309:"音频数据问题",
3310:"输入的音频文件过大",
3311:"采样率rate参数不在选项里",
3312:"音频格式format参数不在选项里"
}
f = open(self.RECOGNITION_PATH,"rb")
voice_data = f.read()
f.close()
speech_data = base64.b64encode(voice_data).decode("utf-8")
speech_length = len(voice_data)
post_data = {
"format": "wav",
"rate": 16000,
"channel": 1,
"cuid": self.CUID,
"token": self.access_token,
"speech": speech_data,
"len": speech_length
}
json_data = json.dumps(post_data).encode("utf-8")
json_length = len(json_data)
req = urllib2.Request(self.RECOGNITION_URL, data=json_data)
req.add_header("Content-Type", "application/json")
req.add_header("Content-Length", json_length)
resp = urllib2.urlopen(req)
resp = resp.read()
resp_data = json.loads(resp.decode("utf-8"))
try:
recognition_result = resp_data["result"][0]
print recognition_result
return recognition_result
except:
print erro_dict[resp_data["err_no"]]
return False
def voiceSynthesis(self,word): #语音合成
token = self.access_token
cuid = self.CUID
# word = urllib.quote(word.encode("utf8"))
url = "http://tsn.baidu.com/text2audio?tex="+word+"&lan=zh&cuid="+cuid+"&ctp=1&tok="+token+"&per=4"
# urllib.urlretrieve(url,self.SYNTHESIS_PATH)
return url
class TurLingAPI:
def init(self):
self.Tuling_API_KEY = “1872aeffd794498696772ce53c5c26ac”
self.URL = “http://www.tuling123.com/openapi/api”
def turlingReply(self,word): #图灵获取回复
body = {"key": self.Tuling_API_KEY,
"info": word.encode("utf-8")}
res = requests.post(self.URL, data=body, verify=True)
if res:
date = json.loads(res.text)
print date["text"]
return date["text"]
else:
print "对不起,未获取到回复信息"
return False
robot.py
“””
autor : AnE
“””
from voice import Voice
import voiceAPI
def main():
voice = Voice()
baiduAPI = voiceAPI.BaiDuAPI()
turlingAPI = voiceAPI.TurLingAPI()
baiduAPI.getToken()
while True:
voice.recordVoice()
recognition_result = baiduAPI.voiceRecognition()
if recognition_result:
if "退出对话" in recognition_result:
break
reply_result = turlingAPI.turlingReply(recognition_result)
if reply_result:
url = baiduAPI.voiceSynthesis(reply_result)
voice.playVoice(url)
else:
url = baiduAPI.voiceSynthesis("对不起,获取回复失败")
voice.playVoice(url)
continue
else:
url = baiduAPI.voiceSynthesis("对不起,识别失败")
voice.playVoice(url)
continue
url = baiduAPI.voiceSynthesis("退出成功")
voice.playVoice(url)
if name == ‘main‘:
main()
其他问题:录音时间固定,无论有没有说话停止都会一直录制直到5s录制结束。
录制声音没有噪音的处理,噪音也会录制进去。没有唤醒功能,唯一目前想到的办法就是不断的录音,直到出现关键词出现。
Qq:1281248141 有问题可以加我QQ问我。github:https://github.com/AneGithub/robot
By:AnE