个人尝试代码仓库:https://gitee.com/enzoism/chrome_tampermonkey
鸣谢:感谢每一位无私奉献的传道者,在此不一一具名!
2024-12-01 目前应该是ChatTTS的呼声最高,但是为了不引入新的学习成本,暂时不考虑使用ChatTTS,直接使用在线API可以满足我的需求!(百度的TTS有太多的AI味,在这个时间节点上,百度可能还是没有把重心放在这些细节上)
已经完成整体的功能,可以直接复制当前脚本到Tampermonkey,选中文字即可进行语音播报
// ==UserScript==
// @name Text to Speech using Baidu API
// @namespace http://tampermonkey.net/
// @version 0.1
// @description Convert selected text to speech using Baidu Text-to-Speech API
// @author Your name
// @match *://*/*
// @grant GM_xmlhttpRequest
// @grant GM_setValue
// @grant GM_getValue
// ==/UserScript==
(function() {
'use strict';
// 配置信息 - 需要替换成你的百度API密钥信息
const API_KEY = '44kNzmCeZNnLHoUcNiGYnyI7';
const SECRET_KEY = '7sNV1osooTqJliQB1aHB5lJKEKlLrg5f';
let access_token = '';
// 获取access_token的函数
async function getAccessToken() {
const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${API_KEY}&client_secret=${SECRET_KEY}`;
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'POST',
url: url,
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json'
},
onload: function(response) {
if (response.status === 200) {
try {
const data = JSON.parse(response.responseText);
if (data.access_token) {
access_token = data.access_token;
GM_setValue('access_token', access_token);
resolve(access_token);
} else {
console.error('获取access_token失败: 返回数据格式错误', data);
reject(new Error('获取access_token失败: 返回数据格式错误'));
}
} catch (error) {
console.error('解析access_token响应失败:', error);
reject(error);
}
} else {
console.error('获取access_token失败:', response.status, response.responseText);
reject(new Error(`获取access_token失败: ${response.status}`));
}
},
onerror: function(error) {
console.error('请求access_token失败:', error);
reject(error);
}
});
});
}
// 文字转语音的函数
async function textToSpeech(text, params = {}) {
if (!access_token) {
access_token = GM_getValue('access_token') || await getAccessToken();
}
if (!access_token) {
alert('无法获取access_token,请检查API配置');
return;
}
const url = `https://tsn.baidu.com/text2audio`;
const requestParams = new URLSearchParams({
tex: encodeURIComponent(text),
tok: access_token,
cuid: 'tampermonkey_tts',
ctp: 1,
lan: 'zh',
spd: params.spd || 5,
pit: params.pit || 5,
vol: params.vol || 5,
per: params.per || 0,
aue: 3
});
try {
GM_xmlhttpRequest({
method: 'GET',
url: `${url}?${requestParams.toString()}`,
responseType: 'blob',
onload: function(response) {
if (response.status === 200) {
const blob = response.response;
const audio = new Audio(URL.createObjectURL(blob));
audio.play().catch(error => {
console.error('播放音频失败:', error);
alert('播放音频失败,请重试');
});
} else {
console.error('语音合成请求失败:', response.status);
alert('语音合成失败,请重试');
}
},
onerror: function(error) {
console.error('请求失败:', error);
alert('请求失败,请重试');
}
});
} catch (error) {
console.error('语音合成出错:', error);
alert('语音合成出错,请重试');
}
}
// 创建控制面板
function createControlPanel() {
const panel = document.createElement('div');
panel.style.position = 'fixed';
panel.style.bottom = '80px';
panel.style.right = '20px';
panel.style.zIndex = '9999';
panel.style.backgroundColor = 'white';
panel.style.padding = '10px';
panel.style.border = '1px solid #ccc';
panel.style.borderRadius = '5px';
panel.style.display = 'none';
// 语速控制
const speedControl = document.createElement('div');
speedControl.innerHTML = `
`;
// 音量控制
const volumeControl = document.createElement('div');
volumeControl.innerHTML = `
`;
// 音调控制
const pitchControl = document.createElement('div');
pitchControl.innerHTML = `
`;
// 发音人选择
const personControl = document.createElement('div');
personControl.innerHTML = `
`;
panel.appendChild(speedControl);
panel.appendChild(volumeControl);
panel.appendChild(pitchControl);
panel.appendChild(personControl);
// 添加事件监听
['speed', 'volume', 'pitch'].forEach(param => {
const input = panel.querySelector(`#tts-${param}`);
const value = panel.querySelector(`#${param}-value`);
input.addEventListener('input', () => {
value.textContent = input.value;
GM_setValue(`tts-${param}`, input.value);
});
});
panel.querySelector('#tts-person').addEventListener('change', (e) => {
GM_setValue('tts-person', e.target.value);
});
document.body.appendChild(panel);
return panel;
}
// 创建悬浮按钮
function createFloatingButton() {
const button = document.createElement('button');
button.innerHTML = '朗读选中文本';
button.style.position = 'fixed';
button.style.bottom = '20px';
button.style.right = '20px';
button.style.zIndex = '9999';
button.style.padding = '10px';
button.style.backgroundColor = '#4CAF50';
button.style.color = 'white';
button.style.border = 'none';
button.style.borderRadius = '5px';
button.style.cursor = 'pointer';
const settingsButton = document.createElement('button');
settingsButton.innerHTML = '??';
settingsButton.style.position = 'fixed';
settingsButton.style.bottom = '20px';
settingsButton.style.right = '140px';
settingsButton.style.zIndex = '9999';
settingsButton.style.padding = '10px';
settingsButton.style.backgroundColor = '#2196F3';
settingsButton.style.color = 'white';
settingsButton.style.border = 'none';
settingsButton.style.borderRadius = '5px';
settingsButton.style.cursor = 'pointer';
const panel = createControlPanel();
settingsButton.addEventListener('click', function() {
panel.style.display = panel.style.display === 'none' ? 'block' : 'none';
});
button.addEventListener('click', function() {
const selectedText = window.getSelection().toString().trim();
if (selectedText) {
const params = {
spd: parseInt(GM_getValue('tts-speed', 9)),
vol: parseInt(GM_getValue('tts-volume', 5)),
pit: parseInt(GM_getValue('tts-pitch', 5)),
per: parseInt(GM_getValue('tts-person', 0))
};
textToSpeech(selectedText, params);
} else {
alert('请先选择要朗读的文本');
}
});
document.body.appendChild(button);
document.body.appendChild(settingsButton);
}
// 初始化
createFloatingButton();
})();
Python3.8.5版本可运行
import requests
import json
import base64
from urllib.parse import quote
import os
from playsound import playsound
class BaiduTTS:
def __init__(self):
# API配置信息
self.API_KEY = '44kNzmCeZNnLHoUcNiGYnyI7'
self.SECRET_KEY = '7sNV1osooTqJliQB1aHB5lJKEKlLrg5f'
self.access_token = None
def get_access_token(self):
"""获取access_token"""
url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.API_KEY}&client_secret={self.SECRET_KEY}"
response = requests.post(url)
if response.status_code == 200:
result = response.json()
self.access_token = result['access_token']
return self.access_token
else:
print(f"获取access_token失败: {response.status_code}")
return None
def text_to_speech(self, text, params=None):
"""文字转语音"""
if not self.access_token:
self.access_token = self.get_access_token()
if not self.access_token:
print("无法获取access_token")
return False
# 默认参数
default_params = {
'spd': 9, # 语速,取值0-15
'pit': 5, # 音调,取值0-15
'vol': 5, # 音量,取值0-15
'per': 0, # 发音人,0为女声,1为男声,3为度逍遥,4为度丫丫
}
# 更新参数
if params:
default_params.update(params)
url = "https://tsn.baidu.com/text2audio"
params = {
'tex': quote(text),
'tok': self.access_token,
'cuid': 'python_tts',
'ctp': 1,
'lan': 'zh',
'aue': 3, # mp3格式
**default_params
}
try:
response = requests.get(url, params=params)
# 检查是否返回音频数据
if response.headers['Content-Type'].startswith('audio/'):
# 保存音频文件
audio_file = "output.mp3"
with open(audio_file, 'wb') as f:
f.write(response.content)
print(f"已保存音频文件: {audio_file}")
# 播放音频
playsound(audio_file)
return True
else:
error_msg = response.json()
print(f"转换失败: {error_msg}")
return False
except Exception as e:
print(f"请求失败: {str(e)}")
return False
def main():
# 使用示例
tts = BaiduTTS()
# 要转换的文本
text = input("请输入要转换的文字: ")
# 可选:自定义参数
params = {
'spd': 9, # 语速
'pit': 5, # 音调
'vol': 5, # 音量
'per': 0 # 发音人
}
# 执行转换
tts.text_to_speech(text, params)
if __name__ == "__main__":
main()
TTS(Text-to-Speech,文本转语音)技术的发展历史可以追溯到20世纪50年代,经历了多个阶段的技术进步和创新。以下是TTS技术发展的主要里程碑:
总的来说,TTS技术从早期的简单合成发展到了现在的深度学习驱动的自然语音生成,不断推动着人机交互和无障碍通信的发展。