1.百度的语音合成
具体配置参考 http://ai.baidu.com/docs#/TTS-Online-Python-SDK/top
from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '15421010' API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi' SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis('来到这个世界,天使都会犯错', 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open('audio.mp3', 'wb') as f: f.write(result)
2.百度的语音识别
具体配置参考 http://ai.baidu.com/docs#/ASR-Online-Python-SDK/top
import os from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '15421010' API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi' SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm") #执行的是一个对文件格式的转换,需要下载一个ffmgep with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp: return fp.read() # 识别本地文件 res = client.asr(get_file_content('jrshdls.m4a'), 'pcm', 16000, { 'dev_pid': 1536, }) print(res)
3.两个结合
import os import time from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '15421010' API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi' SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm") with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp: return fp.read() def audio2text(filepath): # 识别本地文件 res = client.asr(get_file_content(filepath), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0]) return res.get("result")[0] def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename text = audio2text("jrshdls.m4a") filename = text2audio(text) os.system(filename)
4.图灵机器人的简单使用
具体参考 https://www.kancloud.cn/turing/www-tuling123-com/718227
import requests args = { "reqType":0, "perception": { "inputText": { "text": "附近的酒店" } }, "userInfo": { "apiKey": "08a682c47e334a11bd99cbf093930b63", "userId": "1" } } url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args) print(res.json())
5.人工智障的简单应用
import os import time from aip import AipSpeech, AipNlp #提供自认语言处理 """ 你的 APPID AK SK """ APP_ID = '15421010' API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi' SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) #自认语言处理 # 读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm") with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp: return fp.read() def audio2text(filepath): # 识别本地文件 res = client.asr(get_file_content(filepath), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0]) return res.get("result")[0] def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename # print(nlp.simnet("你叫什么名字", "你的名字是什么")) text = audio2text("jrshdls.m4a") if nlp.simnet("时间的多少", text).get("score") >= 0.68: #这个方法会匹配两个字符串的相似度 text = "我的名字是小明" else: text = "我不知道你在说什么" filename = text2audio(text) os.system(filename)
6.如何在浏览器中实现图灵机器人的对话
baidu_ai.py
import os import time from aip import AipSpeech, AipNlp """ 你的 APPID AK SK """ APP_ID = '15421010' API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi' SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm") with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp: return fp.read() def audio2text(filepath): # 识别本地文件 res = client.asr(get_file_content(filepath), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0]) return res.get("result")[0] def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename def to_tuling(text): import requests args = { "reqType": 0, "perception": { "inputText": { "text": text } }, "userInfo": { "apiKey": "08a682c47e334a11bd99cbf093930b63", "userId": "1" } } url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args) text = res.json().get("results")[0].get("values").get("text") return text
app.py
from flask import Flask, request, render_template, jsonify, send_file import baidu_ai from uuid import uuid4 app = Flask(__name__) @app.route("/") def index(): return render_template("index.html") @app.route("/ai", methods=["POST"]) def ai(): # 1.保存录音文件 audio = request.files.get("record") filename = f"{uuid4()}.wav" audio.save(filename) # 2.将录音文件转换为PCM发送给百度进行语音识别 q_text = baidu_ai.audio2text(filename) # 3.将识别的问题交给图灵或自主处理获取答案 a_text = baidu_ai.to_tuling(q_text) # 4.将答案发送给百度语音合成,合成音频文件 a_file = baidu_ai.text2audio(a_text) # 5.将音频文件发送给前端播放 return jsonify({"filename": a_file}) @app.route("/get_audio/") def get_audio(filename): return send_file(filename) if __name__ == '__main__': app.run("0.0.0.0", 5000, debug=True)
index.html
DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Titletitle>
head>
<body>
<audio controls autoplay id="player">audio>
<p>
<button onclick="start_reco()" style="background-color: yellow">录制语音指令button>
p>
<p>
<button onclick="stop_reco_audio()" style="background-color: blue">发送语音指令button>
p>
body>
<script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js">script>
<script type="text/javascript" src="/static/jQuery3.1.1.js">script>
<script type="text/javascript">
var reco = null;
var audio_context = new AudioContext();
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getUserMedia({audio: true}, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
}
function start_reco() {
reco.record();
}
function stop_reco_audio() {
reco.stop();
send_audio();
reco.clear();
}
function send_audio() {
reco.exportWAV(function (wav_file) {
var formdata = new FormData();
formdata.append("record", wav_file);
console.log(formdata);
$.ajax({
url: "http://192.168.13.177:5000/ai",
type: 'post',
processData: false,
contentType: false,
data: formdata,
dataType: 'json',
success: function (data) {
console.log(data);
document.getElementById("player").src ="http://192.168.13.177:5000/get_audio/" + data.filename
}
});
})
}
script>
html>