pyttsx3
pip install pyttsx3
import pyttsx3
def say_text(engine, words, voice):
# 设置音色
engine.setProperty('voice', voice)
engine.say(words)
engine.runAndWait()
engine.stop()
def main():
# 创建对象
engine = pyttsx3.init()
# 设置语音速率
engine.setProperty('rate', 100)
# 设置语音音量,音量最小为 0,最大为 1
engine.setProperty('volume', 1.0)
# words = input('请输入要转换的文本:')
words = '这是一个小练习'
say_text(engine, words, 'zh')
if __name__ == '__main__':
main()
tts
pip install tts
from gtts import gTTS
import os
language = 'zh' # 或 zh-tw
# slow=False时,语速为Normal
output = gTTS(text="这是中文", lang=language, slow=False)
output.save(filename)
# Play the converted file
os.system(f"start {filename}")
win32com
python -m pip install pyHook
python -m pip install pywin32
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
speaker.Speak("你好,这是中文!")
ttskit
python -m pip install -U ttskit pyworld
python -m pip install ffmpeg
zhtts
python3 -m pip install -U ffmpeg pyworld ttskit # 会安装tensorflow-cpu版本,但使用时会报CUDA的错
python3 -m pip install tensorflow-gpu
import zhtts
import sounddevice as sd
tts = zhtts.TTS() # use fastspeech2 by default
text1 = "2020年,这是一个开源的端到端中文语音合成系统"
# 下面是自带的函数,借助Pycharm查看出来的。audio为numpy数组可直接传入播放器。
mel = tts.text2mel(text1)
print(mel.shape, type(mel))
audio = tts.mel2audio(mel)
print(audio, type(audio))
# 下面这里可以先看【6.sounddevice播放音频】
sd.play(audio, samplerate=24000) # samplerate=24000为通过其他包转换为.wav文件,再读取该文件获取的
sd.wait()
souddevice播放音频
python -m pip install soundfile sounddevice
import soundfile
import sounddevice
devs=sd.query_devices() #返回系统所有的声音设备
print(devs) # 带><的是默认播放设备
# 1 代表设备号;麦克风阵列 (Synaptics Audio)代表设备名称;MME (2 in, 0 out)代表驱动为MME和2个输入通道
"""
0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)
> 1 麦克风阵列 (Synaptics Audio), MME (2 in, 0 out)
2 Microsoft Sound Mapper - Output, MME (0 in, 2 out)
< 3 扬声器 (Synaptics Audio), MME (0 in, 2 out)
4 主声音捕获驱动程序, Windows DirectSound (2 in, 0 out)
5 麦克风阵列 (Synaptics Audio), Windows DirectSound (2 in, 0 out)
6 主声音驱动程序, Windows DirectSound (0 in, 2 out)
7 扬声器 (Synaptics Audio), Windows DirectSound (0 in, 2 out)
8 扬声器 (Synaptics Audio), Windows WASAPI (0 in, 2 out)
9 麦克风阵列 (Synaptics Audio), Windows WASAPI (2 in, 0 out)
10 麦克风阵列 1 (Synaptics Audio capture), Windows WDM-KS (2 in, 0 out)
11 麦克风阵列 2 (Synaptics Audio capture), Windows WDM-KS (4 in, 0 out)
12 麦克风阵列 3 (Synaptics Audio capture), Windows WDM-KS (4 in, 0 out)
13 Output 1 (Synaptics Audio output), Windows WDM-KS (0 in, 2 out)
14 Output 2 (Synaptics Audio output), Windows WDM-KS (0 in, 8 out)
15 Input (Synaptics Audio output), Windows WDM-KS (2 in, 0 out)
16 耳机 (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0
;(iGene-U2)), Windows WDM-KS (0 in, 1 out)
17 耳机 (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0
;(iGene-U2)), Windows WDM-KS (1 in, 0 out)
18 耳机 (), Windows WDM-KS (0 in, 2 out)
"""
data, sampler = soundfile.read(path)
try:
sd.default.device[1] = sd.default.device[1] # 可以通过设备号指定播放设备
sd.play(data, samplerate=sampler) # data为numpy数组,samplerate可能为采样率吧(不太清楚)
except Exception as e:
print("播放失败")
sd.wait()
Python语音转文字
import pyttsx3
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
engine = pyttsx3.init()
engine.setProperty('voice', 'zh')
engine.say('请输入您要播放的文件路径')
engine.runAndWait()
path_dir = input('请输入您要播放的文件路径后回车:')
with open(path_dir, 'r') as f:
engine = pyttsx3.init()
engine.setProperty('voice', 'zh')
lines = f.readlines()
print('要说的话:',lines)
for line in lines:
engine.say(line)
engine.runAndWait()
"""Saving Voice to a file"""
engine = pyttsx3.init()
engine.setProperty('voice', 'zh')
# On linux make sure that 'espeak' and 'ffmpeg' are installed
engine.save_to_file('迪迦奥特曼(dijia.top)', 'test.mp3')
engine.runAndWait()
engine.stop()
speech 实现语音识别
import speech
while True:
say = speech.input() # 接收语音
speech.say("you said:"+say) #说话
if say == "你好":
speech.say("How are you?")
elif say == "天气":
speech.say("今天天气晴!")
SpeechRecognation
pip install SpeechRecognition