最近有点颓,还是写写python缓解一下(不务正业x
之前看到有拆笔记本屏幕做魔镜的觉得特别神奇,当时记得是要树莓派来着。现在自己搞搞发现百度的语音识别api还不错,加上以前注册的图灵ai大概也能搞个语音聊天机器人,主要还是笔记本上终于带了一个麦克风(哭泣
然后搞搞就出来了;-P虽然写得略丑但是还是很有成就感的
# -*- coding: utf-8 -*-
import wave, pyaudio
from aip import AipSpeech
import numpy as np
import requests, json
APP_ID = '不能告诉你'
API_KEY = '不能告诉你'
SECRET_KEY = '不能告诉你'
aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 1
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "D:/audio.wav"
def record_wave():
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer=CHUNK)
# print "* recording"
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
# print "* done recording"
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
def identify():
res = aipSpeech.asr(get_file_content(WAVE_OUTPUT_FILENAME), 'wav', 16000, {'lan': 'zh',})
# print res["err_msg"]
position = "广东省东莞市南城区"
if res["err_msg"] == "success.":
print(res["result"][0])
cont = requests.get('http://www.tuling123.com/openapi/api?key=不能告诉你&info=%s&userid=111' % (res["result"][0], )).content
m = json.loads(cont)
print (m['text'])
def Monitor():
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
# print("开始缓存录音")
frames = []
rec = []
flag = False
while (True):
# print 'begin '
data = stream.read(CHUNK)
if flag == True:
rec.append(data)
frames.append(data)
audio_data = np.fromstring(data, dtype=np.short)
large_sample_count = np.sum( audio_data > 2000 )
temp = np.max(audio_data)
# print temp
if temp > 2000:
flag = True
# print "检测到信号"
# print '当前阈值:',temp
if temp <= 2000:
# record_wave()
if flag == True:
flag = False
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(rec))
wf.close()
rec = []
identify()
stream.stop_stream()
stream.close()
p.terminate()
Monitor()
思想就是简单粗暴地一小段一小段录音,如果遇到某一段音量先变大后变小就截取这一段作为一句话,扔到百度语音api识别,再把识别的结果扔到图灵api里面
受网速限制,在家里测试的时候有点延时,连着说几句会出现漏掉中间的情况。再加上图灵机器人又有点傻,回应是纯文字而没做语音导致体验不太好
懒得改进了,玩玩还是够用的