python实现语音转文字

一、下载模型地址

模型地址

python实现语音转文字_第1张图片

两个模型
一个小一点,加载快
一个大一点,加载慢
加载的话每次启动只加载一次

二、代码

pip install speech_recognition vosk 

代码

import json

import speech_recognition as sr
from vosk import Model, KaldiRecognizer
recognizer = sr.Recognizer()

def recognize_audio():
    # 确保已经下载并解压 Vosk 的中文模型
    model = Model(r"C:\work\pytools\zh2")  # 例如:model = Model("vosk-model-small-cn-0.22")
    rec = KaldiRecognizer(model, 16000)

    with sr.Microphone() as source:
        print("请开始说话...")
        recognizer.adjust_for_ambient_noise(source)  # 适应环境噪音
        while True:
            try:
                audio = recognizer.listen(source)  # 获取音频输入
                print("正在识别...")
                if rec.AcceptWaveform(audio.frame_data):
                    result = json.loads(rec.Result())
                    print(f"你说了: {result['text']}")
            except sr.UnknownValueError:
                print("未能识别音频")
            except sr.RequestError:
                print("无法连接到语音识别服务")

if __name__ == "__main__":
    recognize_audio()

三中的参数修改在二中

import speech_recognition as sr
import json
from vosk import Model, KaldiRecognizer

recognizer = sr.Recognizer()

def setup_vosk():
    # 加载Vosk中文模型
    model = Model(r"C:\work\pytools\zh2")  # 你可以换成你的模型路径
    recognizer_vosk = KaldiRecognizer(model, 16000)
    return recognizer_vosk

def recognize_audio():
    # 配置 pyaudio 相关参数
    with sr.Microphone(sample_rate=16000, chunk_size=4000) as source:  # 设置采样率和每次读取的帧数
        print("请开始说话...")
        recognizer.adjust_for_ambient_noise(source)  # 适应环境噪声
        recognizer_vosk = setup_vosk()  # 获取KaldiRecognizer

        while True:
            try:
                # 获取音频数据
                audio = recognizer.listen(source)
                audio_data = audio.frame_data  # 获取原始音频数据(字节流)
                if recognizer_vosk.AcceptWaveform(audio_data):
                    result = json.loads(recognizer_vosk.Result())  # 获取识别结果
                    print(f"你说了: {result['text']}")
                else:
                    print("没有语音识别到内容")
            except sr.UnknownValueError:
                print("未能识别音频")
            except sr.RequestError as e:
                print(f"请求失败: {e}")

if __name__ == "__main__":
    recognize_audio()


三、二的代码好像不太好用

import pyaudio
from vosk import Model, KaldiRecognizer
import json



if __name__ == '__main__':
    a1 = Model(r"C:\work\pytools\zh2")
    p = pyaudio.PyAudio()
    a2 =  p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=4000)
    a3 = KaldiRecognizer(a1, 16000)
    print("开始")
    while True:
        a4 = a2.read(4000)
        if a3.AcceptWaveform(a4):
            a5 = json.loads(a3.Result())["text"]
            print(a5)

你可能感兴趣的:(python,开发语言)