语音转文字API调用(百度 谷歌 科大讯飞)

试图调百度 谷歌 科大讯飞的语音识别API进行语音转文字

1.谷歌(谷歌好像需要梯子)

需要下载ffmpeg.exe和ffprobe.exe
下载连接https://ffmpeg.zeranoe.com/builds/
MP3的音频文件不能直接用谷歌的语音识别去识别,可以看下这篇文章(https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/79832700(删掉号)这里面介绍了有关音频的一些理论知识,什么频率赫兹啥的),包括谷歌语音识别接口的使用;所#以,在识别语音之前,我们要先对保存到本地的语音消息转化格式,这里用到了pydub的#ffmpeg和ffprobe(https://ffmpeg.zeranoe.com/builds/下载完解压,ffmpeg.exe和#ffprobe.exe复制到你想放的地方,然后再代码中引用一下就可以了,windows平台可以,#不止linux是否可以)

将mp3转化成wav

from pydub import AudioSegment
import os
from os import path
 
# os.getcwd() 是获取当前路径,这里可以写绝对路径
AudioSegment.ffmpeg = os.getcwd()+'\\ffmpeg.exe'
AudioSegment.ffprobe = os.getcwd()+'\\ffprobe.exe'
 
def dealMp3(filePath,fileName):
    sound = AudioSegment.from_mp3(filePath)
    #获取原始pcm数据
    data=sound._data
    sound_wav = AudioSegment(
        #指定原始pcm文件
        # raw audio data (bytes)
        data = data,
        #指定采样深度,可选值1,2,3,4
        # 2 byte (16 bit) samples
        sample_width = 2,
        #指定采样频率
        # 44.1 kHz frame rate
        # 16kHz frame rate
        frame_rate = 16000,
        #指定声道数量
        # stereo or mono
        channels = 1
    )
    #导出wav文件到当前目录下
    sound_wav.export(fileName,format='wav')
    # 判断生成wav格式的文件成功没
    isDeal = os.path.exists(os.getcwd()+'\\'+fileName)
 
    #如果wav文件生成了就删除mp3文件 - -这个可以不参考
    if isDeal:
        #删除mp3文件
        os.remove(filePath)
 
    return isDeal

将wav识别成文字


from pydub import AudioSegment
import os
from os import path
 
# os.getcwd() 是获取当前路径,这里可以写绝对路径
AudioSegment.ffmpeg = os.getcwd()+'\\ffmpeg.exe'
AudioSegment.ffprobe = os.getcwd()+'\\ffprobe.exe'
def voice2Text(file_name): 
    voice_file = path.join(path.dirname(path.realpath(__file__)), file_name)
    # use the audio file as the audio source
    r = sr.Recognizer()
    with sr.AudioFile(voice_file) as source:
        audio = r.record(source)
    try:
        content = r.recognize_google(audio, language='zh-CN')
        print("Google Speech Recognition:" + content)
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("Google Speech Recognition error; {0}".format(e))
 
    return content or '无法翻译'

2.百度

加载需要的库,其中from aip import AipSpeech是百度的


from pydub import AudioSegment
import os
from os import path
import speech_recognition as sr
from aip import AipSpeech
import time


下面这个函数是切割mp3的,因为百度语音识别的API只能60秒

# =============================================================================
# #切割完的存放地址
# filePath = r'C:\Users\Administrator\Desktop\python工程\语音识别系统\block'
#  
# # 操作函数
# def get_wav_make(dataDir):
# 	sound= AudioSegment.from_mp3(dataDir)
# 	duration = sound.duration_seconds * 1000  # 音频时长(ms)	
# 	for i in range(0,int(duration),59000):
# 		begin = i
# 		end = i + ( 1000 * 59 )
# 		cut_wav = sound[begin:end]   #以毫秒为单位截取[begin, end]区间的音频			
# 		cut_wav.export(filePath+ 'test'+ str(int(i/( 1000 * 59 ))) + '.mp3', format='mp3')   #存储新的wav文件
# 
# get_wav_make(r'C:\Users\Administrator\Desktop\python工程\语音识别系统\22.mp3')
#切割完的存放地址
#filePath = r'C:\Users\Administrator\Desktop\python工程\语音识别系统\block'
# =============================================================================

进行语音转文字

get_wav_make(r'C:\Users\Administrator\Desktop\python工程\语音识别系统\22.mp3')
#下面输入你们在百度只智能注册号后给出的id key啥的
APP_ID = 'APP_ID '
API_KEY = 'API_KEY '
SECRET_KEY = 'SECRET_KEY '

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
#定义函数
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()
# 开始识别
start_time = time.time()
ret = client.asr(get_file_content(r'C:\Users\Administrator\Desktop\python工程\语音识别系统\blocktest2.mp3'), 'pcm', 16000, {
    'dev_pid': 1537,
})
used_time = time.time() - start_time
print( "used time: {}s".format( round( time.time() - start_time, 2 ) ) )
print('ret:{}'.format(ret))

3.科大讯飞
忘了

你可能感兴趣的:(上学,#,笔记,百度,语音识别,人工智能)