文本转语音-Azure+Python

 

# -*- coding: utf-8 -*-


### 文本转语音 Azure ###
import os, requests, time
from xml.etree import ElementTree

'''
快速入门:https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/quickstart-python-text-to-speech
官方源码:https://github.com/Azure-Samples/Cognitive-Speech-TTS/blob/master/Samples-Http/Python/TTSSample.py

获取密钥:https://azure.microsoft.com/zh-cn/try/cognitive-services/my-apis/?apiSlug=speech-services&country=China&allowContact=true&fromLogin=True
终结点: https://westus.api.cognitive.microsoft.com/sts/v1.0
获取Token接口:https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken
文本转语音接口:https://westus.tts.speech.microsoft.com/cognitiveservices/v1
密钥 1: c97eb7a449684921b91c867bbc9138f4
密钥 2: 3b526f03d9ef4808af2920587b8b04b2
'''


class TextToSpeech(object):

    ### 测试一下
    def testOne(self):
        subscription_key = "3b526f03d9ef4808af2920587b8b04b2"
        # tts = input("Input some text to convert to speech: ")  # 控制台输入
        tts = 'At the end of the year, there were 77.586 million employed people nationwide, including 43.19 million urban workers. ' \
              'In the whole year, the number of new jobs in urban areas was 13.61 million, an increase of 100,000 over the previous year. ' \
              'At the end of the year, the national urban unemployment rate was 4.9%, down 0.1 percentage points from the end of the previous year; ' \
              'the urban registered unemployment rate was 3.8%, down 0.1 percentage point. ' \
              'The total number of migrant workers nationwide [9] was 288.36 million, an increase of 0.6% over the previous year. ' \
              'Among them, the number of migrant workers was 172.66 million, an increase of 0.5%; ' \
              'the number of local migrant workers was 115.7 million, an increase of 0.9%.'
        timestr = time.strftime("%Y%m%d-%H%M")

        # 如果需要代理
        proxies = {
            "http": "代理地址1",
            "https": "代理地址2",
        }

        fetch_token_url = "https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken"
        headers = {
            'Ocp-Apim-Subscription-Key': subscription_key
        }
        response = requests.post(fetch_token_url, headers=headers, proxies=proxies, verify=False)
        access_token = str(response.text)
        print(">> 获取到Token:" + access_token)
        constructed_url = 'https://westus.tts.speech.microsoft.com/cognitiveservices/v1'
        headers = {
            # 前面带有单词 Bearer 的授权令牌
            'Authorization': 'Bearer ' + access_token,

            # 指定所提供的文本的内容类型。 接受的值:application/ssml+xml。
            'Content-Type': 'application/ssml+xml',

            # 指定音频输出格式,取值如下:
            # raw-16khz-16bit-mono-pcm
            # raw-8khz-8bit-mono-mulaw
            # riff-8khz-8bit-mono-alaw
            # riff-8khz-8bit-mono-mulaw
            # riff-16khz-16bit-mono-pcm
            # audio-16khz-128kbitrate-mono-mp3
            # audio-16khz-64kbitrate-mono-mp3
            # audio-16khz-32kbitrate-mono-mp3
            # raw-24khz-16bit-mono-pcm
            # riff-24khz-16bit-mono-pcm
            # audio-24khz-160kbitrate-mono-mp3
            # audio-24khz-96kbitrate-mono-mp3
            # audio-24khz-48kbitrate-mono-mp3
            'X-Microsoft-OutputFormat': 'raw-16khz-16bit-mono-pcm',

            # 应用程序名称,少于 255 个字符。
            # Chrome的 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36
            'User-Agent': 'Chrome/73.0.3683.86'
        }
        xml_body = ElementTree.Element('speak', version='1.0')
        xml_body.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-us')
        voice = ElementTree.SubElement(xml_body, 'voice')
        voice.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
        # 'en-US-Guy24kRUS',全称:'Microsoft Server Speech Text to Speech Voice (en-US, Guy24KRUS)'
        voice.set('name','en-US-Guy24kRUS')
        voice.text = tts
        body = ElementTree.tostring(xml_body)
        print(">> 调用接口转换语音中......")
        response = requests.post(constructed_url, headers=headers, data=body, proxies=proxies, verify=False)
        if response.status_code == 200:
            fileName = 'testsound-' + timestr + '.wav'
            with open(fileName, 'wb') as audio:
                audio.write(response.content)
                print(">> 文本转语音已完成,生成的音频文件:" + fileName)
        else:
            print("[失败] response code: " + str(response.status_code)
                    + "\nresponse headers: " + str(response.headers) )

### 函数入口
if __name__ == "__main__":
    test = TextToSpeech()
    test.testOne()

 

你可能感兴趣的:(⦿,开发技术)