科大接口调用文档:
https://www.xfyun.cn/doc/asr/voicedictation/Linux-SDK.html#_2、sdk集成指南
from ctypes import *
import time
import threading
# 调用动态链接库
dll = cdll.LoadLibrary("../Linux_iat1226_xxxxxxx/libs/x64/libmsc.so")
# 登录参数,apppid一定要和你的下载SDK对应
login_params = b"appid = xxxxxx, work_dir = ."
FRAME_LEN = 640 # Byte
MSP_SUCCESS = 0
# 返回结果状态
MSP_AUDIO_SAMPLE_FIRST = c_int(1)
MSP_AUDIO_SAMPLE_CONTINUE = c_int(2)
MSP_AUDIO_SAMPLE_LAST = c_int(4)
MSP_REC_STATUS_COMPLETE = c_int(5)
# 你的语音文件路径
filename = "./f1.wav"
class Msp:
def __init__( self ):
self.recogStatus = c_int(8)
self.counter = 0
self.laststr = ''
self.sessionID = None
self.epStatus = None
self.count = 0
def login( self ):
ret = dll.MSPLogin(None, None, login_params)
# print('MSPLogin =>', ret)
def logout( self ):
ret = dll.MSPLogout()
# print('MSPLogout =>', ret)
def isr( self, audiofile, session_begin_params ):
ret = c_int()
self.sessionID = c_voidp()
dll.QISRSessionBegin.restype = c_char_p
self.sessionID = dll.QISRSessionBegin(None, session_begin_params, byref(ret))
print('QISRSessionBegin => self.sessionID:', self.sessionID, 'ret:', ret.value)
# 每秒【1000ms】 16000 次 * 16 bit 【20B】 ,每毫秒:1.6 * 16bit 【1.6*2B】 = 32Byte
# 1帧音频20ms【640B】 每次写入 10帧=200ms 【6400B】
# piceLne = FRAME_LEN * 20
piceLne = 1638 * 2
self.epStatus = c_int(0)
self.recogStatus = c_int(0)
wavFile = open(audiofile, 'rb')
while wavFile:
wavData = wavFile.read(piceLne)
aud_stat = MSP_AUDIO_SAMPLE_CONTINUE
if (self.count == 0):
aud_stat = MSP_AUDIO_SAMPLE_FIRST # 第一句
if len(wavData) <= 0:
# print('最后一句话')
ret = dll.QISRAudioWrite(self.sessionID, None, 0, MSP_AUDIO_SAMPLE_LAST, byref(self.epStatus),byref(self.recogStatus))
#print('send last ,recogStatus:',self.recogStatus.value,'ret:',ret)
break
else:
ret = dll.QISRAudioWrite(self.sessionID, wavData, len(wavData), aud_stat,
byref(self.epStatus),
byref(self.recogStatus))
#print('len(wavData):', len(wavData), 'QISRAudioWrite ret:', ret, 'epStatus:', self.epStatus.value, 'recogStatus:', self.recogStatus.value)
self.count += 1
time.sleep(0.1)
if self.recogStatus.value==0:
self.get_result()
wavFile.close()
print("所有待识别音频已全部发送完毕")
while self.recogStatus.value != 5:
self.get_result()
time.sleep(1)
def get_result( self , ):
#print('开始获取识别结果', self.recogStatus.value)
ret = c_int(0)
dll.QISRGetResult.restype = c_char_p
retstr = dll.QISRGetResult(self.sessionID, byref(self.recogStatus), 0, byref(ret))
#print("error code : ", ret.value, 'recogStatus:', self.recogStatus.value)
if retstr is not None:
self.laststr += retstr.decode()
print(self.laststr)
if self.recogStatus.value == 5:
ret = dll.QISRSessionEnd(self.sessionID, 'end')
print('语音识别结束')
return self.laststr
def XF_text( filepath, audiorate ):
msp = Msp()
print("登录科大讯飞")
msp.login()
print("科大讯飞登录成功")
session_begin_params = b"sub = iat, ptt = 0, result_encoding = utf8, result_type = plain, domain = iat"
if 16000 == audiorate:
session_begin_params = b"sub = iat, domain = iat, language = zh_cn, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = utf8,vad_enable=0"
text = msp.isr(filepath, session_begin_params)
msp.logout()
return text
if __name__ == '__main__':
res = XF_text(filename, 16000)