U3D C# 中文语音识别功能 之 Hololens篇----百度语音识别REST API

下面介绍如何在U3D 开发适用于 Hololens 的 UWP 平台的 语音识别功能

首先这篇文章,得对Hololens有一定的了解。那么你如果理解的话,应该也知道Hololens本身其实也是支持语音识别的而且效率速度不错。但是,它并不能识别中文。这是笔者在墙外面查了不少资料得到结果。当然笔者能力有限,如果你发现他有方法支持中文语音识别的话,请告诉我!!!!!大恩不辞言谢。


说实话,文中讲解的不是很详细。


  1. 首先你先得了解一下百度的语音识别REST的相关API以及使用方式,很简单的。(由于这不是文章主体,所以不多加描述)
    相关链接:百度语音识别服务 —— 语音识别 REST API 开发笔记 ,
    API请求方式基本说明
    如果你看会了。那么应该知道。其实我们就首先得需要token``令牌。这个通过U3D的www`类直接拿到就OK了。也很简单。最主要的就是要去给百度云上传的音频文件。所以对音频文件的处理,也是这个项目的重要地方。
. 所以我们将创建两个类,一个用于网络处理`SpeechRecognition`.另一个用于录取以及处理音频操作`RecordingWav`。
  1. 能读这篇文章的大概都会面对同一个坑吧。那就是 UWP 不支持好多文件读写的类,比如FileStream,Directory,File…等等类。官方文档有所陈述,以及推荐的代替类库和这些类库的使用方法。https://docs.unity3d.com/Manual/windowsstore-missingtypes.html.所以我们也就用这些类来进行对录音文件的读写操作。具体怎么做。等会代码中陈述。

1. 那么先展示出来录取以及处理音频操作类:RecordingWav

using UnityEngine;
using System;
using UnityEngine.UI;
using System.Collections;
using System.IO;

#if NETFX_CORE  //UWP下编译  
using Windows.Storage;
using StreamWriter = WinRTLegacy.IO.StreamWriter;
using StreamReader = WinRTLegacy.IO.StreamReader;
#else
using StreamWriter = System.IO.StreamWriter;
using StreamReader = System.IO.StreamReader;
#endif

[RequireComponent(typeof(AudioSource))]
public class RecordingWav : MonoBehaviour
{

    //录音图片变红。停止变蓝
    public Image imageButton;
    //按钮点击的动画。
    public Animator recordingButton;

    string filePath = null;
    int audioLength_time;

    private AudioSource m_audioSource;
    private AudioClip m_audioClip;

    public const int SamplingRate = 8000;
    private const int HEADER_SIZE = 44;

    public SpeechRecognition speechRecognition;


    //判断是否录音
    [HideInInspector]
    public bool isRecording = false;

    //文件的大小
    [HideInInspector]
    public Byte[] speech_Byte;

    //用于缓存处理后的录音文件。待用在 SpeechRecognition类中。
    [HideInInspector]
    public MemoryStream memoryStream;

    public MicrophoneManager microphoneManager;


    // Use this for initialization  


    void Start()
    {
        m_audioSource = GetComponent<AudioSource>();

        filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");
        
    }

    public void StartRecording( bool isRecording)
    {
        if (isRecording)
        {
            Microphone.End(null);
            
            m_audioClip = Microphone.Start(null, false, 60, SamplingRate);

            imageButton.color = Color.red;
        }
        else
        {
            imageButton.color = Color.white;

            audioLength_time = 0;
            int lastPos = Microphone.GetPosition(null);

            if (Microphone.IsRecording(null))
            {
                audioLength_time = lastPos / SamplingRate;
            }
            else
            {
                audioLength_time = 0;
                Debug.Log("error : 录音时间太短");
            }
            Microphone.End(null);

            if (audioLength_time <= 1.0f)
            {
                return;
            }

            SaveWav(filePath, m_audioClip);

            PlayAudioClip();

        }
    }
    public void PlayAudioClip()
    {

        if (m_audioClip.length > 5 && m_audioClip != null)
        {
            if (m_audioSource.isPlaying)
            {
                m_audioSource.Stop();
            }
            m_audioSource.clip = m_audioClip;
            m_audioSource.Play();
        }
    }

    bool SaveWav(string filename, AudioClip clip)
    {
        try
        {
            File.Delete(filePath);
        }
        catch (Exception ex)
        {
            Debug.Log(ex);
        }

        try
        {  
            FileInfo info = new FileInfo(filePath);
            if (!info.Exists)
            {
                 info.Create();
                 
            }
         
            ConvertAndWrite(clip);

            return true;
        }
        catch (Exception ex)
        {
            Debug.Log("error : " + ex);
            return false;
        }

    }

    //从新计算录音文件的长度大小。录音长度公式为 : SamplingRate * 实际录音时间
    void ConvertAndWrite(AudioClip clip)
    {
        int actual_Length = (audioLength_time + 1) * SamplingRate * 2;
        //防止数据丢失,多加一秒的时间

        float[] samples = new float[actual_Length];

        clip.GetData(samples, 0);

        Int16[] intData = new Int16[samples.Length];
        //converting in 2 float[] steps to Int16[], //then Int16[] to Byte[]  

        Byte[] bytesData = new Byte[samples.Length * 2];
        //bytesData array is twice the size of  
        //dataSource array because a float converted in Int16 is 2 bytes.  


        int rescaleFactor = 32767; //to convert float to Int16  

        for (int i = 0; i < samples.Length; i++)
        {
            intData[i] = (short)(samples[i] * rescaleFactor);

            // bytesData = BitConverter.GetBytes(intData[i]);

            Byte[] byteArr = new Byte[2];
            byteArr = BitConverter.GetBytes(intData[i]);
            byteArr.CopyTo(bytesData, i * 2);
        }

        speech_Byte = null;
        
        //把处理后的二进制文件。通过内存流先缓存下来。
        memoryStream = new MemoryStream(bytesData,false);
       
        
        StartCoroutine(WriteFileStream());
        
    }

    IEnumerator WriteFileStream()
    {
        yield return new WaitForSeconds(1);
        
        speechRecognition.UploadAudio();
    }
    
    public void UIHighlighted()
    {
        recordingButton.Play("Pressed");
    }

    bool isPressed = false;
    public void UIPressed()
    {
        isPressed = !isPressed;

        recordingButton.Play("Highlighted");
        if(microphoneManager)
        {
            if(isPressed)
            {
                microphoneManager.StartRecording();
                imageButton.color = Color.red;
            }
            else
            {
                microphoneManager.StopRecording();
                imageButton.color = Color.white;
            }
           
        }
        else
        {
            StartRecording(isPressed);
        }
       
    }
    public void UINormal()
    {
        recordingButton.Play("Normal");

        if(isPressed)
        {
          //  StartRecording(false);
        }
       
    }
}

2. 那么先展示出来网络处理类:SpeechRecognition

using UnityEngine;
using System.Collections;
using System.Text;
using System;
using UnityEngine.Networking;
using UnityEngine.UI;
using System.IO;

#if NETFX_CORE  //UWP下编译  
using Windows.Storage;
using StreamWriter = WinRTLegacy.IO.StreamWriter;
using StreamReader = WinRTLegacy.IO.StreamReader;
#else
using StreamWriter = System.IO.StreamWriter;
using StreamReader = System.IO.StreamReader;
#endif

[Serializable]
public class UploadData
{
    public string format;
    public int rate;
    public int channel;
    public string cuid;
    public string lan;
    public string token;
    public string speech;
    public int len;
   
}

[Serializable]
public class BaiDuTokenData
{
    public string access_token;
    public string session_key;
    public string scope;
    public string refresh_token;
    public string session_secret;
    public int expires_in;

}

[Serializable]
public class AcceptanceIdentification
{
    public string err_no;
    public string err_msg;
    public string sn;
    public string[] result;
    public string corpus_no;

}
public class SpeechRecognition : MonoBehaviour {

	// 用于识别语音中的对应文字
    public GameObject text_Group_layout;
    private Text[] textgroup;

	//展示识别结果
    public Text show_text;

    //录音以及处理音频文件的类
    private RecordingWav recordingWav;

    string filePath = null;

    string format = "wav";
    int rate = 8000;
    int channel = 1;
    string cuid = "xxxxx";   //此处写入你的相关账号
    string lan = "zh";
    string token;
    string speech;
    int len;

   
    // Use this for initialization
    void Start () {

        filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");
        recordingWav = GetComponent<RecordingWav>();

        if (text_Group_layout == null)
            return;

         textgroup = text_Group_layout.gameObject.GetComponentsInChildren<Text>();
        

    }
    public void UploadAudio()
    {
        StartCoroutine(GetIdentifyWords());
    }

//用来进行语音识别的方法。如果识别正确则对应的文字变色
    void MatchTheWords(string needTest)
    {
        if (text_Group_layout == null)
            return;

        Debug.Log(needTest);
        for (int i = 0;i < textgroup.Length;i++)
        {
            if(needTest.Contains(textgroup[i].text))
            {
                textgroup[i].color = Color.green;
            }
            else
            {
                textgroup[i].color = Color.red;
            }
           
        }
    }


//写入你的客户端ID
    string client_id = ".........";
    string client_secret = "..............";
    string url_token = "https://openapi.baidu.com/oauth/2.0/token";


    string url_api = "http://vop.baidu.com/server_api";
   
    string post_string;

    IEnumerator GetIdentifyWords()
    {
       // using (StreamReader audio = new StreamReader(filePath, Encoding.UTF8))
        using (StreamReader audio = new StreamReader(recordingWav.memoryStream))
        {
            // StreamReader audio = new StreamReader(recordingWav.memoryStream);
            BinaryReader br = new BinaryReader(audio.BaseStream);

            
            len = (int)audio.BaseStream.Length;
            
            byte[] buffer = new byte[len];
            br.Read(buffer, 0, buffer.Length);
            speech = Convert.ToBase64String(buffer);
            
            audio.Dispose();
            recordingWav.memoryStream.Dispose();
        }


        #region GetToken
        WWWForm form1 = new WWWForm();
        form1.AddField("grant_type", "client_credentials");
        form1.AddField("client_id", client_id);
        form1.AddField("client_secret", client_secret);

        WWW w1 = new WWW(url_token, form1);
        yield return w1;

        BaiDuTokenData getToken = new BaiDuTokenData();
        JsonUtility.FromJsonOverwrite(w1.text, getToken);
        token = getToken.access_token;
        #endregion
        
        if (token == null || speech == null)
            show_text.text = "参数信息不够";
        yield return 0;

        var request = new UnityWebRequest(url_api, "POST");
        
        UploadData uploadData = new UploadData();
        uploadData.format = format;
        uploadData.rate = rate;
        uploadData.channel = channel;
        uploadData.cuid = cuid;
        uploadData.token = token;
        uploadData.speech = speech;
        uploadData.len = len;
        uploadData.lan = lan;
        
        string data = JsonUtility.ToJson(uploadData);

        //JsonData data = new JsonData();

        Byte[] post_byte = Encoding.UTF8.GetBytes(data);
        

        request.uploadHandler = (UploadHandler)new UploadHandlerRaw(post_byte);
        request.downloadHandler = (DownloadHandler)new DownloadHandlerBuffer();

        request.SetRequestHeader("Content-Type", "application/json");

        yield return request.Send();

        try
        {
            AcceptanceIdentification acceptData = new AcceptanceIdentification();

            JsonUtility.FromJsonOverwrite(request.downloadHandler.text, acceptData);

            show_text.text = request.downloadHandler.text;

            show_text.text = acceptData.result[0].ToString();

            MatchTheWords(acceptData.result[0]);
        }
        catch(Exception ex)
        {
            show_text.text = "error :" + ex;
        }
        
    }
    
}


这里写图片描述

Hello ,I am 李本心明


首先谢谢大家的支持,其次如果你碰到什么其他问题的话,欢迎来 我自己的一个 讨论群559666429来(扫扫下面二维码或者点击群链接 Unity3D[ 交流&副业]CLUB ),大家一起找答案,共同进步 同时欢迎各大需求商入住,发布自己的需求,给群内伙伴提供副职,赚取外快。对了,资源的话,要在群公告里面找。

由于工作生活太忙了,对于大家的帮助时间已经没有之前那么充裕了。如果有志同道合的朋友,可以接受无偿的帮助别人,可以单独联系我,一块经营一下。
如果你有更好的经营方式也来练习我,加我QQ

在这里插入图片描述


你可能感兴趣的:(hololens语音)