Python3调用依图长语音转写示例

今天又调了下依图的长语音,官方是python2的,而且的demo的请求地址写错了,应该是文档更新了,demo没更新;

sample.py文件改写了下,官方这个sample感觉写的很不Python。。。稍微假装封装下。。。

#sample.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2018-Present Shanghai Yitu Technology Co., Ltd.
# Licensed under the Apache License, Version 2.0

from yitu_speech.clients.long_audio_client import LongAudioClient
from yitu_speech.common.utils import audio_utils

# Your app id and api key
app_id = xxxx # Your app id
api_key = 'xxxx' # Your api key
# slice0 = "test.wav"

def get_result(slice0):
    # Initialize client
    client = LongAudioClient(app_id, api_key)

    print(" create an audio")
    #calc file MD5
    # slice0 = "sample_1.wav"
    md5 = audio_utils.md5sum(slice0)

    # To create an audio
    res = client.create_audio('pcm', 1, md5)
    if res["rtn"] is None or res["rtn"] != 0 or res["audioId"] == "":
        print('create_audio fail res: %r'%res["rtn"])
        return
    audioId = res["audioId"]
    print("audio id:", audioId)

    #To upload file part
    print(" upload a slice of an audio")
    res = client.upload_slice(audioId, slice0, 0, md5)
    if res["rtn"] != 0:
        print('upload_slice fail res:%r'% res)
        return

    #post a job
    print(" post a job")
    res = client.post_job(audioId)
    if res["rtn"] != 0 or res["jobId"] == "":
        print('post_job fail res:%r'%res)
        return
    jobId = res["jobId"]
    print("job id:", jobId)

    #get a job result
    print("try to get the result")
    res = client.wait_job_complete(jobId, 10)
    # print (res["resultText"])
    return res["resultText"]


if __name__ == '__main__':


    res=get_result(slice0)
    print(res)

yitu_speech/common/utils/audio_utils.py 文件读取音频时改成二进制方式读取

#audio_utils.py


# Copyright 2018-Present Shanghai Yitu Technology Co., Ltd. 
# Licensed under the Apache License, Version 2.0

import base64
import os
import hashlib


# Converts file into base64 encoded string
def load_file_base64(file_path):
    if not os.path.isfile(file_path):
        return None

    with open(file_path, "rb") as audio_file:
        encoded_string = base64.b64encode(audio_file.read())
        return encoded_string.decode('utf-8')


def md5sum(filename):
    #python3
    fd = open(filename, "rb")
    #python2
    # fd = open(filename, "rb")
    fcont = fd.read()
    fd.close()
    return hashlib.md5(fcont).hexdigest()

yitu_speech/clients/long_audio_client.py demo文件的api地址写错了

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# Copyright 2018-Present Shanghai Yitu Technology Co., Ltd.
# Licensed under the Apache License, Version 2.0


from ..common import constants
from ..common.core.base_client import BaseClient
from ..common.exception import error_code
from ..common.exception.exceptions import ClientException
import os
import time


class LongAudioClient(BaseClient):
    #demo错误地址
    # _asr_url = "http://long-asr-prod.yitutech.com/lasr-api/"
    #正确地址
    _asr_url = "http://long-asr-prod.yitutech.com/lasr-api/v2/asr"

    def _post(self, url, data):
        return super(LongAudioClient, self)._post_request(url, data)

    def _put(self, url, data):
        return super(LongAudioClient, self)._put_multipart_request(url, data)

    def _get(self, url, data=None):
        return super(LongAudioClient, self)._get_request(url, data)

#    To declare an audio
    def create_audio(self, aue, num_of_parts, md5):
        audio_url = self._asr_url +"/audio"

        # Assert param
        if aue not in constants.CONSTANT_AUE_SET:
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if num_of_parts <= 0:
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if md5 is None or md5.strip() == "":
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        # Construct request body
        request_body = {'aue': aue, 'numOfParts': num_of_parts, "md5": md5}

        # Send request and return
        return self._post(audio_url, request_body)

#   To upload a part of an audio file
    def upload_slice(self, audio_id, filepath, slice_index, md5):
        # Assert param
        if audio_id is None or audio_id.strip() == "":
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if not os.path.isfile(filepath) or not os.path.exists(filepath):
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if slice_index < 0:
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        audio_url = self._asr_url + "/audio/" + str(audio_id) + "/part/" + str(slice_index)

        data = {}
        fd = open(filepath, 'rb')
        data['audioData'] = (filepath, fd.read())
        data['md5'] = md5

        # Send request and return
        return self._put(audio_url, data)

#   To post a job
    def post_job(self, audio_id, lang=1, scene=0, custom_words=(), custom_words_id=()):
        audio_url = self._asr_url + "/job"

        # Assert param
        if audio_id is None or audio_id.strip() == "":
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if lang != constants.CONSTANT_LANG_MANDARIN or scene != constants.CONSTANT_SCENE_GENERAL:
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        request_body = {"audioId": audio_id, "lang": str(lang), "scene": scene,
                         "customWords": custom_words, "useCustomWordsID": custom_words_id}

        # Send request and return
        return self._post(audio_url, request_body)

#   To get ASR result
    def get_job_result(self, job_id):
        # Assert param
        if job_id is None or job_id.strip() == "":
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        audio_url = self._asr_url + "/job/" + job_id + "/plain"

        # Send request and return
        return self._get(audio_url)

#   try {retry} attampts to get the request
    def wait_job_complete(self, job_id, retry):
        # Assert param
        if job_id is None or job_id.strip() == "":
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        if retry < 1:
            raise ClientException(error_code.APP_RES_INVALID_PARAM)

        while retry >= 0:
            rtn = self.get_job_result(job_id)

            if rtn['jobStatus'] is None or rtn['jobStatus'] <= 2:
                time.sleep(2)
                retry = retry - 1
            else:
                return rtn

你可能感兴趣的:(Python3调用依图长语音转写示例)