新建脚本,将下列代码复制进去即可,
using LitJson;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.UI;
public class ToWord : MonoBehaviour {
private string token= ""; //access_token
private string cuid = "11"; //用户标识
private string format = "wav"; //语音格式
private int rate = 8000; //采样率
private int channel = 1; //声道数
private string speech; //语音数据,进行base64编码
private int len; //原始语音长度
private string lan = "zh"; //语种
private string grant_Type = "client_credentials";
private string client_ID = "这里输入百度的appkey,自己到官网申请填入这里"; //百度appkey
private string client_Secret = "这里输入百度secretkey,自己到官网申请填写"; //百度Secret Key
private string baiduAPI = "http://vop.baidu.com/server_api";
private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";
private byte[] clipByte;
///
/// 转换出来的TEXT
///
public static string audioToString;
public AudioSource aud;
private int audioLength;//录音的长度
public delegate void CallBack(string name);
public delegate string ds();
private static ToWord _toWord;
public MicroPhoneManager m;
private void Awake()
{
_toWord = this;
StartCoroutine(GetToken(getTokenAPIPath));
}
public static ToWord GetInstance() {
return _toWord;
}
///
/// 获取百度用户令牌
///
/// 获取的url
///
private IEnumerator GetToken(string url)
{
WWWForm getTForm = new WWWForm();
getTForm.AddField("grant_type", grant_Type);
getTForm.AddField("client_id", client_ID);
getTForm.AddField("client_secret", client_Secret);
WWW getTW = new WWW(url, getTForm);
yield return getTW;
if (getTW.isDone)
{
if (getTW.error == null)
{
token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
Debug.Log("获取百度用户令牌 初始化完成");
}
else
Debug.Log("error:" + getTW.error);
}
}
///
/// 开始录音
///
public void StartMic(int durationTime)
{
if (Microphone.devices.Length == 0) return;
Microphone.End(null);
Debug.Log("Start");
aud.clip = Microphone.Start(null, false, durationTime, rate);
}
///
/// 结束录音
///
public void EndMic(CallBack cb,BtnInfo info)
{
int lastPos = Microphone.GetPosition(null);
if (Microphone.IsRecording(null))
audioLength = lastPos / rate;//录音时长
else
audioLength = 10;
Debug.Log("录音结束");
Microphone.End(null);
clipByte = GetClipData();
len = clipByte.Length;
speech = Convert.ToBase64String(clipByte);
using (FileStream fs = CreateEmpty(Utils.GetAudioDataPath() + "/" + info.ID + "_1.wav"))
{
ConvertAndWrite(fs, aud.clip);
WriteHeader(fs, aud.clip);
Debug.Log("保存成功");
}
StartCoroutine(GetAudioString(baiduAPI, cb));
}
void aaa(string str) { }
private void WriteHeader(FileStream stream, AudioClip clip)
{
int hz = clip.frequency;
int channels = clip.channels;
int samples = clip.samples;
stream.Seek(0, SeekOrigin.Begin);
Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
stream.Write(riff, 0, 4);
Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
stream.Write(chunkSize, 0, 4);
Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
stream.Write(wave, 0, 4);
Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
stream.Write(fmt, 0, 4);
Byte[] subChunk1 = BitConverter.GetBytes(16);
stream.Write(subChunk1, 0, 4);
UInt16 two = 2;
UInt16 one = 1;
Byte[] audioFormat = BitConverter.GetBytes(one);
stream.Write(audioFormat, 0, 2);
Byte[] numChannels = BitConverter.GetBytes(channels);
stream.Write(numChannels, 0, 2);
Byte[] sampleRate = BitConverter.GetBytes(hz);
stream.Write(sampleRate, 0, 4);
Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2
stream.Write(byteRate, 0, 4);
UInt16 blockAlign = (ushort)(channels * 2);
stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
UInt16 bps = 16;
Byte[] bitsPerSample = BitConverter.GetBytes(bps);
stream.Write(bitsPerSample, 0, 2);
Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
stream.Write(datastring, 0, 4);
Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
stream.Write(subChunk2, 0, 4);
}
private FileStream CreateEmpty(string filepath)
{
FileStream fileStream = new FileStream(filepath, FileMode.Create);
byte emptyByte = new byte();
for (int i = 0; i < 44; i++) //preparing the header
{
fileStream.WriteByte(emptyByte);
}
return fileStream;
}
private void ConvertAndWrite(FileStream fileStream, AudioClip clip)
{
float[] samples = new float[clip.samples];
//float[] samples = new float[(int)CurAudioSource.time + 1];
clip.GetData(samples, 0);
Int16[] intData = new Int16[samples.Length];
Byte[] bytesData = new Byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
Byte[] byteArr = new Byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
fileStream.Write(bytesData, 0, bytesData.Length);
}
///
/// 把语音转换为文字
///
///
///
private IEnumerator GetAudioString(string url, CallBack cb)
{
JsonWriter jw = new JsonWriter();
jw.WriteObjectStart();
jw.WritePropertyName("format");
jw.Write(format);
jw.WritePropertyName("rate");
jw.Write(rate);
jw.WritePropertyName("channel");
jw.Write(channel);
jw.WritePropertyName("token");
jw.Write(token);
jw.WritePropertyName("cuid");
jw.Write(cuid);
jw.WritePropertyName("len");
jw.Write(len);
jw.WritePropertyName("speech");
jw.Write(speech);
jw.WriteObjectEnd();
WWWForm w = new WWWForm();
WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
yield return getASW;
if (getASW.isDone)
{
if (getASW.error == null)
{
JsonData getASWJson = JsonMapper.ToObject(getASW.text);
if (getASWJson["err_msg"].ToString() == "success.")
{
audioToString = getASWJson["result"][0].ToString();
if (audioToString.Substring(audioToString.Length - 1) == ",")
audioToString = audioToString.Substring(0, audioToString.Length - 1);
}
}
else
{
//Debug.LogError(getASW.error);
audioToString = "";
Debug.Log("error:" + getASW.error);
}
Debug.Log("此次语音文字为:" + audioToString);
if (cb != null)
{
cb(audioToString);
}
}
}
///
/// 把录音转换为Byte[]
///
///
public byte[] GetClipData()
{
if (aud.clip == null)
{
//Debug.LogError("录音数据为空");
Debug.Log("录音数据为空");
return null;
}
float[] samples = new float[aud.clip.samples];
aud.clip.GetData(samples, 0);
byte[] outData = new byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
short temshort = (short)(samples[i] * rescaleFactor);
byte[] temdata = System.BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
if (outData == null || outData.Length <= 0)
{
//Debug.LogError("录音数据为空");
Debug.Log("录音数据为空");
return null;
}
//return SubByte(outData, 0, audioLength * 8000 * 2);
return outData;
}
void Start () {
}
private void OnGUI()
{
if (GUILayout.Button("Start"))
StartMic();
if (GUILayout.Button("End"))
EndMic(null);
}
public Text debugText;
private void Update()
{
debugText.text = audioToString;
}
}
百度语音是通过http请求的方式来进行识别的,最大的有点是全平台通用,不过识别率没有讯飞的高