直接上代码!
using System.Collections;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
using System.Net;
using System.Net.WebSockets;
using System.Net.Sockets;
using System;
using System.Security.Cryptography;
using System.Threading;
using UnityEngine.UI;
using System.Threading.Tasks;
namespace Webiat
{
public class TTSCore : MonoBehaviour
{
//绑定你自己的参数,不知道如何获取的,可以自己百度
private static string app_id = "APPID";
private static string api_secret = "APISECRET";
private static string api_key = "APIKEY";
public string Uri = "wss://tts-api.xfyun.cn/v2/tts";
List<float> _clipData = new List<float>();
[HideInInspector]
public ClientWebSocket ttsWebSocket;//语音合成websocket
#region ----------------------接口鉴权-----------------------------
///
/// 基于HMACSHA256加密获取接口鉴权
///
///
public string GetUrl(string uriStr)
{
Uri uri = new Uri(uriStr);
string host = uri.Host;
string date = DateTime.UtcNow.ToString("R");
//使用base64编码获取签名前
string signature_origin = $"host: {host}\ndate: {date}\nGET /v2/tts HTTP/1.1";
//获取最终签名
string signature = hmacsha256(signature_origin, api_secret);
//使用base64编码获取鉴权前
string authorization_origion = $"api_key=\"{api_key}\", algorithm=\"hmac-sha256\", headers=\"host date request-line\", signature=\"{signature}\"";
//最终生成的鉴权接口
string authorization = Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization_origion));
string url = $"wss://tts-api.xfyun.cn/v2/tts?authorization={authorization}&date={date}&host={host}";
return url;
}
///
/// HMACSHA256加密
///
///
///
///
private string hmacsha256(string signature_origion, string secrect)
{
HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(secrect));
string signature = Convert.ToBase64String(mac.ComputeHash(Encoding.UTF8.GetBytes(signature_origion)));
return signature;
}
#endregion
#region ---------------------------------------自写的websocket--------------------------------------
///
/// 开启语音合成
///
///
///
public async void StartTTS(string text,AudioSource audioSource)
{
if (ttsWebSocket != null)
{
ttsWebSocket.Abort();//每次开启前先终止此websocket
}
await ConnectTTSWebSocket(text, audioSource);//发起websocket连接,获取数据。//发起websocket连接,获取数据。这里也可以不等待,主要是方便结束后进行一些其他操作
}
///
/// 定义一个异步方法进行websocket连接
///
/// 需要转换成语音的文本
public async Task ConnectTTSWebSocket(string text,AudioSource audioSource)
{
using (ttsWebSocket = new ClientWebSocket())
{
CancellationToken ct = new CancellationToken();
Uri url = new Uri(GetUrl(Uri));
await ttsWebSocket.ConnectAsync(url, ct);//在请求行URL字段后面添加authorization,date,host三个参数 发起websocket连接GET请求
text = Convert.ToBase64String(Encoding.UTF8.GetBytes(text));
TTSPara.common common = new TTSPara.common();
TTSPara.business business = new TTSPara.business();
TTSPara.data data = new TTSPara.data(text);
//将参数转换成Json格式字符串
string message = JsonUtility.ToJson(new TTSPara.TTSParameter(common, business, data));
//发送数据
await ttsWebSocket.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(message)), WebSocketMessageType.Binary, true, ct);
StringBuilder sb = new StringBuilder();
while (ttsWebSocket.State == WebSocketState.Open)
{
//Debug.Log("开始接收数据");
var result = new byte[4096];
await ttsWebSocket.ReceiveAsync(new ArraySegment<byte>(result), ct);
//去除空字符
List<byte> list = new List<byte>(result);
while (list[list.Count - 1] == 0x00)
{
list.RemoveAt(list.Count - 1);
}
var str = Encoding.UTF8.GetString(list.ToArray());
sb.Append(str);//进入队列。
if (str.EndsWith("}}"))
{
CompositionData.Data data1 = JsonUtility.FromJson<CompositionData>(sb.ToString()).data;
Debug.Log("返回的数据内容data:" + JsonUtility.ToJson(data1));
int status = data1.status;
float[] fs = bytesToFloat(Convert.FromBase64String(data1.audio));
foreach (var f in fs)
{
//PlayQueue.Enqueue(f);
_clipData.Add(f);
}
sb.Clear();
if (status == 2)//语音合成结束标识
{
ttsWebSocket.Abort();//如果语音合成完毕就终止这个socket
//这里采样总数就是我们获取到的浮点list的大小,不懂的同学可以去查阅一下关于AudioClip与float之间的转换!
//注意:主要还要看第三个参数“channels”声道数,这里是1,所以_clipData.Count等于AudioClip的samples
audioSource.clip= AudioClip.Create("tts", _clipData.Count, 1, 16000, false);
audioSource.clip.SetData(_clipData.ToArray(),0);
_clipData.Clear();//到这儿就基本完成了,自己就可以另外写脚本控制相机上AudioSource组件的播放了,这里我就不写了!
break;
}
}
}
}
}
#region ----------------------数据定义及数据转化-----------------
///
/// 把接收到的byte[]数据转换成AudioClip可读取的float[]类型
///
///
///
public static float[] bytesToFloat(byte[] byteArray)//byte[]数组转化为AudioClip可读取的float[]类型
{
float[] sounddata = new float[byteArray.Length / 2];
for (int i = 0; i < sounddata.Length; i++)
{
sounddata[i] = bytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);
}
return sounddata;
}
static float bytesToFloat(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
//小端和大端顺序要调整
short s;
if (BitConverter.IsLittleEndian)
s = (short)((secondByte << 8) | firstByte);
else
s = (short)((firstByte << 8) | secondByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;
}
[Serializable]
public class CompositionData//合成数据
{
[Serializable]
public class Data
{
public int status;
public string audio;
}
public Data data;
}
#endregion
#endregion
}
}
using System;
namespace TTSPara
{
[Serializable]
public class TTSParameter
{
public common common;
public business business;
public data data;
public TTSParameter(common common, business business, data data)
{
this.common = common;
this.business = business;
this.data = data;
}
public TTSParameter()
{
common = new common();
business = new business();
data = new data("这里是默认参数");
}
}
[Serializable]
public class common
{
public string app_id = "de01ae2d";
}
[Serializable]
public class business
{
public string aue = "raw";
//public int sfl = 0;
public string auf = "audio/L16;rate=16000";
public string vcn = "catherine";
public int speed = 25;
public int volume = 50;
//public int pitch=50;
//public int bgs=0;
//public string tte = "UTF8";
//public string reg = "2";
//public string rdn = "0";
}
[Serializable]
public class data
{
public string text;
public int status = 2;
public data(string text)
{
this.text = text;
}
public data() { }
}
}
最重要的就这两个脚本了,可以直接复制使用!至于怎么控制播放大家可以自由控制,不过建议在调用StartTTS时,需要等待,否则会出错!下面我再给大家一个调用示例吧!
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using Webiat;
public TTSCore TTSCore;//把上面写的TTSCore脚本挂到场景物体中记得赋值哦
public AudioSource _ttsTalkAudioSource;//不要忘记赋值哦
public async void TTSPlay()
{
await TTSCore.StartTTS(resTxt, _ttsTalkAudioSource);
_ttsTalkAudioSource.Play();
}
//如果你还想播放完之后回调其他方法的话,你可以参考下面个
public async void TTSPlay(Action action=null)
{
await TTSCore.StartTTS(resTxt, _ttsTalkAudioSource);
_ttsTalkAudioSource.Play();
if (action==null)
return;
float clipLength = _ttsTalkAudioSource.clip.length;
Debug.Log("合成音频时长:"+clipLength);
//下列两种延时调用都可用,不过个人建议使用第二种。需要注意的是第二种的参数单位是ms(毫秒),而第一种是s(秒)
//1、开启协程,延时调用action委托,此延时调用方法我已放到下边
StartCoroutine(DelayToInvoke.DelayToInvokeDo(() =>
{
action?.Invoke();
},clipLength));
//2、异步等待
//await Task.Delay(clipLength*1000);
//action?.Invoke();
}
using UnityEngine;
using System.Collections;
using System;
public class DelayToInvoke : MonoBehaviour
{
public static IEnumerator DelayToInvokeDo(Action action, float delaySeconds)
{
yield return new WaitForSeconds(delaySeconds);
action();
}
}