python pyaudio 百度语音识别api 图灵api 语音聊天机器人

最近有点颓,还是写写python缓解一下(不务正业x

idea


之前看到有拆笔记本屏幕做魔镜的觉得特别神奇,当时记得是要树莓派来着。现在自己搞搞发现百度的语音识别api还不错,加上以前注册的图灵ai大概也能搞个语音聊天机器人,主要还是笔记本上终于带了一个麦克风(哭泣

然后搞搞就出来了;-P虽然写得略丑但是还是很有成就感的

Code


# -*- coding: utf-8 -*-
import wave, pyaudio
from aip import AipSpeech
import numpy as np
import requests, json

APP_ID = '不能告诉你'
API_KEY = '不能告诉你'
SECRET_KEY = '不能告诉你'

aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 1
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "D:/audio.wav"

def record_wave():
    p = pyaudio.PyAudio()
    stream = p.open(format = FORMAT,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    frames_per_buffer=CHUNK)
    # print "* recording"
    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    # print "* done recording"

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

def identify():
    res = aipSpeech.asr(get_file_content(WAVE_OUTPUT_FILENAME), 'wav', 16000, {'lan': 'zh',})
    # print res["err_msg"]
    position = "广东省东莞市南城区"
    if res["err_msg"] == "success.":
        print(res["result"][0])
        cont = requests.get('http://www.tuling123.com/openapi/api?key=不能告诉你&info=%s&userid=111' % (res["result"][0], )).content
        m = json.loads(cont)
        print (m['text'])

def Monitor():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    # print("开始缓存录音")
    frames = []
    rec = []
    flag = False
    while (True):
        # print 'begin '
        data = stream.read(CHUNK)
        if flag == True:
            rec.append(data)
        frames.append(data)
        audio_data = np.fromstring(data, dtype=np.short)
        large_sample_count = np.sum( audio_data > 2000 )
        temp = np.max(audio_data)

        # print temp

        if temp > 2000:
            flag = True
            # print "检测到信号"
            # print '当前阈值:',temp

        if temp <= 2000:
            # record_wave()
            if flag == True:
                flag = False
                wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
                wf.setnchannels(CHANNELS)
                wf.setsampwidth(p.get_sample_size(FORMAT))
                wf.setframerate(RATE)
                wf.writeframes(b''.join(rec))
                wf.close()
                rec = []
                identify()

    stream.stop_stream()
    stream.close()
    p.terminate()

Monitor()

思想就是简单粗暴地一小段一小段录音,如果遇到某一段音量先变大后变小就截取这一段作为一句话,扔到百度语音api识别,再把识别的结果扔到图灵api里面

受网速限制,在家里测试的时候有点延时,连着说几句会出现漏掉中间的情况。再加上图灵机器人又有点傻,回应是纯文字而没做语音导致体验不太好
懒得改进了,玩玩还是够用的

你可能感兴趣的:(python)