unity 使用百度语音进行语音识别

新建脚本,将下列代码复制进去即可,

using LitJson;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.UI;

public class ToWord : MonoBehaviour {
    private string token= "";                           //access_token
    private string cuid = "11";        //用户标识
    private string format = "wav";                  //语音格式
    private int rate = 8000;                        //采样率
    private int channel = 1;                        //声道数
    private string speech;                          //语音数据,进行base64编码
    private int len;                                //原始语音长度
    private string lan = "zh";                      //语种
    private string grant_Type = "client_credentials";
    private string client_ID = "这里输入百度的appkey,自己到官网申请填入这里";  //百度appkey
    private string client_Secret = "这里输入百度secretkey,自己到官网申请填写";  //百度Secret Key

    private string baiduAPI = "http://vop.baidu.com/server_api";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";
    private byte[] clipByte;
    /// 
    /// 转换出来的TEXT
    /// 
    public static string audioToString;
    public  AudioSource aud;
    private int audioLength;//录音的长度

    public delegate void CallBack(string name);
    public delegate string ds();
    private static ToWord _toWord;

    public MicroPhoneManager m;

    private void Awake()
    {
        _toWord = this;
        StartCoroutine(GetToken(getTokenAPIPath));
    }
    public static ToWord GetInstance() {
        return _toWord;
    }
    /// 
    /// 获取百度用户令牌
    /// 
    /// 获取的url
    /// 
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                Debug.Log("获取百度用户令牌 初始化完成");
            }
            else
                Debug.Log("error:" + getTW.error);
        }
    }

    /// 
    /// 开始录音
    /// 
    public void StartMic(int durationTime)
    {
        if (Microphone.devices.Length == 0) return;
        Microphone.End(null);
        Debug.Log("Start");
        aud.clip = Microphone.Start(null, false, durationTime, rate);
    }

    /// 
    /// 结束录音
    /// 
    public void EndMic(CallBack cb,BtnInfo info)
    {
        int lastPos = Microphone.GetPosition(null);
        if (Microphone.IsRecording(null))
            audioLength = lastPos / rate;//录音时长  
        else
            audioLength = 10;
        Debug.Log("录音结束");
        Microphone.End(null);

        clipByte = GetClipData();
        len = clipByte.Length;
        speech = Convert.ToBase64String(clipByte);

        using (FileStream fs = CreateEmpty(Utils.GetAudioDataPath() + "/" + info.ID + "_1.wav"))
        {
            ConvertAndWrite(fs, aud.clip);
            WriteHeader(fs, aud.clip);
            Debug.Log("保存成功");
        }
        StartCoroutine(GetAudioString(baiduAPI, cb));
    }

    void aaa(string str) { }

    private void WriteHeader(FileStream stream, AudioClip clip)
    {
        int hz = clip.frequency;
        int channels = clip.channels;
        int samples = clip.samples;

        stream.Seek(0, SeekOrigin.Begin);

        Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
        stream.Write(riff, 0, 4);

        Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
        stream.Write(chunkSize, 0, 4);

        Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
        stream.Write(wave, 0, 4);

        Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
        stream.Write(fmt, 0, 4);

        Byte[] subChunk1 = BitConverter.GetBytes(16);
        stream.Write(subChunk1, 0, 4);

        UInt16 two = 2;
        UInt16 one = 1;

        Byte[] audioFormat = BitConverter.GetBytes(one);
        stream.Write(audioFormat, 0, 2);

        Byte[] numChannels = BitConverter.GetBytes(channels);
        stream.Write(numChannels, 0, 2);

        Byte[] sampleRate = BitConverter.GetBytes(hz);
        stream.Write(sampleRate, 0, 4);

        Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2  
        stream.Write(byteRate, 0, 4);

        UInt16 blockAlign = (ushort)(channels * 2);
        stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);

        UInt16 bps = 16;
        Byte[] bitsPerSample = BitConverter.GetBytes(bps);
        stream.Write(bitsPerSample, 0, 2);

        Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
        stream.Write(datastring, 0, 4);

        Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
        stream.Write(subChunk2, 0, 4);

    }
    private FileStream CreateEmpty(string filepath)
    {
        FileStream fileStream = new FileStream(filepath, FileMode.Create);
        byte emptyByte = new byte();

        for (int i = 0; i < 44; i++) //preparing the header  
        {
            fileStream.WriteByte(emptyByte);
        }

        return fileStream;
    }
    private void ConvertAndWrite(FileStream fileStream, AudioClip clip)
    {
        float[] samples = new float[clip.samples];

        //float[] samples = new float[(int)CurAudioSource.time + 1];

        clip.GetData(samples, 0);

        Int16[] intData = new Int16[samples.Length];

        Byte[] bytesData = new Byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16  

        for (int i = 0; i < samples.Length; i++)
        {
            intData[i] = (short)(samples[i] * rescaleFactor);
            Byte[] byteArr = new Byte[2];
            byteArr = BitConverter.GetBytes(intData[i]);
            byteArr.CopyTo(bytesData, i * 2);
        }
        fileStream.Write(bytesData, 0, bytesData.Length);
    }
    /// 
    /// 把语音转换为文字
    /// 
    /// 
    /// 
    private IEnumerator GetAudioString(string url, CallBack cb)
    {
        JsonWriter jw = new JsonWriter();
        jw.WriteObjectStart();
        jw.WritePropertyName("format");
        jw.Write(format);
        jw.WritePropertyName("rate");
        jw.Write(rate);
        jw.WritePropertyName("channel");
        jw.Write(channel);
        jw.WritePropertyName("token");
        jw.Write(token);
        jw.WritePropertyName("cuid");
        jw.Write(cuid);
        jw.WritePropertyName("len");
        jw.Write(len);
        jw.WritePropertyName("speech");
        jw.Write(speech);
        jw.WriteObjectEnd();
        WWWForm w = new WWWForm();


        WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
        yield return getASW;
        if (getASW.isDone)
        {
            if (getASW.error == null)
            {
                JsonData getASWJson = JsonMapper.ToObject(getASW.text);
                if (getASWJson["err_msg"].ToString() == "success.")
                {
                    audioToString = getASWJson["result"][0].ToString();
                    if (audioToString.Substring(audioToString.Length - 1) == ",")
                        audioToString = audioToString.Substring(0, audioToString.Length - 1);                               
                }
            }
            else
            {
                //Debug.LogError(getASW.error);
                audioToString = "";
                Debug.Log("error:" + getASW.error);
            }
            Debug.Log("此次语音文字为:" + audioToString);
            if (cb != null)
            {
                cb(audioToString);
            }
        }
    }

    /// 
    /// 把录音转换为Byte[]
    /// 
    /// 
    public byte[] GetClipData()
    {
        if (aud.clip == null)
        {
            //Debug.LogError("录音数据为空");
            Debug.Log("录音数据为空");
            return null;
        }

        float[] samples = new float[aud.clip.samples];

        aud.clip.GetData(samples, 0);



        byte[] outData = new byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16   

        for (int i = 0; i < samples.Length; i++)
        {
            short temshort = (short)(samples[i] * rescaleFactor);

            byte[] temdata = System.BitConverter.GetBytes(temshort);

            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        if (outData == null || outData.Length <= 0)
        {
            //Debug.LogError("录音数据为空");
            Debug.Log("录音数据为空");
            return null;
        }

        //return SubByte(outData, 0, audioLength * 8000 * 2);
        return outData;
    }
    void Start () {

    }

    private void OnGUI()
    {
        if (GUILayout.Button("Start"))
            StartMic();

        if (GUILayout.Button("End"))
            EndMic(null);

    }
    public Text debugText;
    private void Update()
    {
        debugText.text = audioToString;
    }
}

百度语音是通过http请求的方式来进行识别的,最大的有点是全平台通用,不过识别率没有讯飞的高

你可能感兴趣的:(unity)