目录
Unity 工具 之 Azure 微软连续语音识别ASR的简单整理
一、简单介绍
二、实现原理
三、注意实现
四、实现步骤
五、关键脚本
Unity 工具类,自己整理的一些游戏开发可能用到的模块,单独独立使用,方便游戏开发。
本节介绍,这里在使用微软的Azure 进行语音合成的两个方法的做简单整理,这里简单说明,如果你有更好的方法,欢迎留言交流。
官网注册:
面向学生的 Azure - 免费帐户额度 | Microsoft Azure
官网技术文档网址:
技术文档 | Microsoft Learn
官网的TTS:
语音转文本快速入门 - 语音服务 - Azure AI services | Microsoft Learn
Azure Unity SDK 包官网:
安装语音 SDK - Azure Cognitive Services | Microsoft Learn
SDK具体链接:
https://aka.ms/csspeech/unitypackage
1、官网申请得到语音识别对应的 SPEECH_KEY 和 SPEECH_REGION
2、因为语音识别需要用到麦克风,移动端需要申请麦克风权限
3、开启语音识别,监听语音识别对应事件,即可获取到识别结果
1、注意如果有卡顿什么的,注意主子线程切换,可能可以适当解决你的卡顿现象
2、注意电脑端(例如windows)运行可以不申请麦克风权限,但是移动端(例如Android)运行要申请麦克风权限,不然无法开启识别成功,可能会报错:Exception with an error code: 0x15
System.ApplicationException: Exception with an error code: 0x15 at Microsoft.CognitiveServices.Speech.Internal.SpxExceptionThrower.ThrowIfFail (System.IntPtr hr) [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.StartContinuousRecognition () [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.DoAsyncRecognitionAction (System.Action recoImplAction) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.Execute () [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.ExecutionContext.RunInternal (System.Threading.ExecutionContext executionContext, System.Threading.ContextCallback callback, System.Object state, System.Boolean preserveSyncCtx) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.ExecuteWithThreadLocal (System.Threading.Tasks.Task& currentTaskSlot) [0x00000] in
1、下载好SDK 导入
2、简单的搭建场景
3、编写对应脚本,测试语音识别功能
4、把测试脚本添加到场景中,并赋值
5、如果移动端,例如 Android 端,勾选如下,添加麦克风权限
5、运行,点击对应按钮,开始识别,Console 中可以看到识别结果
1、TestSpeechRecognitionHandler
using UnityEngine;
using UnityEngine.Android;
using UnityEngine.UI;
public class TestSpeechRecognitionHandler : MonoBehaviour
{
#region Data
///
/// 按钮,文本
///
public Button QuitButton;
public Button ASRButton;
public Button StopASRButton;
public Text ASRText;
///
/// m_SpeechAndKeywordRecognitionHandler
///
SpeechRecognitionHandler m_SpeechAndKeywordRecognitionHandler;
#endregion
#region Liefecycle function
///
/// Start
///
void Start()
{
QuitButton.onClick.AddListener(OnClickQuitButton);
ASRButton.onClick.AddListener(OnClickASRButton);
StopASRButton.onClick.AddListener(OnClickStopASRButton);
// 请求麦克风权限
RequestMicrophonePermission();
}
///
/// 应用退出
///
async void OnApplicationQuit() {
await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();
}
#endregion
#region Private function
///
/// RequestMicrophonePermission
///
void RequestMicrophonePermission()
{
// 检查当前平台是否为 Android
if (Application.platform == RuntimePlatform.Android)
{
// 检查是否已经授予麦克风权限
if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
{
// 如果没有权限,请求用户授权
Permission.RequestUserPermission(Permission.Microphone);
}
}
else
{
// 在其他平台上,可以执行其他平台特定的逻辑
Debug.LogWarning("Microphone permission is not needed on this platform.");
}
SpeechInitialized();
}
///
/// SpeechInitialized
///
private void SpeechInitialized() {
ASRText.text = "";
m_SpeechAndKeywordRecognitionHandler = new SpeechRecognitionHandler();
m_SpeechAndKeywordRecognitionHandler.onRecognizingAction = (str) => { Debug.Log("onRecognizingAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.onRecognizedSpeechAction = (str) => { Loom.QueueOnMainThread(() => ASRText.text += str); Debug.Log("onRecognizedSpeechAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.onErrorAction = (str) => { Debug.Log("onErrorAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.Initialized();
}
///
/// OnClickQuitButton
///
private void OnClickQuitButton() {
#if UNITY_EDITOR
UnityEditor.EditorApplication.isPlaying = false;
#else
Application.Quit();
#endif
}
///
/// OnClickASRButton
///
private void OnClickASRButton() {
m_SpeechAndKeywordRecognitionHandler.StartContinuousRecognizer();
}
///
/// OnClickStopASRButton
///
private async void OnClickStopASRButton()
{
await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();
}
#endregion
}
2、SpeechRecognitionHandler
using UnityEngine;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System;
using Task = System.Threading.Tasks.Task;
///
/// 语音识别转文本和关键词识别
///
public class SpeechRecognitionHandler
{
#region Data
///
///
///
const string TAG = "[SpeechAndKeywordRecognitionHandler] ";
///
/// 识别配置
///
private SpeechConfig m_SpeechConfig;
///
/// 音频配置
///
private AudioConfig m_AudioConfig;
///
/// 语音识别
///
private SpeechRecognizer m_SpeechRecognizer;
///
/// LLM 大模型配置
///
private ASRConfig m_ASRConfig;
///
/// 识别的事件
///
public Action onRecognizingAction;
public Action onRecognizedSpeechAction;
public Action onErrorAction;
public Action onSessionStoppedAction;
#endregion
#region Public function
///
/// 初始化
///
///
public async void Initialized()
{
m_ASRConfig = new ASRConfig();
Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE " + m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE);
Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_REGION " + m_ASRConfig.AZURE_SPEECH_REGION);
m_SpeechConfig = SpeechConfig.FromSubscription(m_ASRConfig.AZURE_SPEECH_KEY, m_ASRConfig.AZURE_SPEECH_REGION);
m_SpeechConfig.SpeechRecognitionLanguage = m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE;
m_AudioConfig = AudioConfig.FromDefaultMicrophoneInput();
Debug.Log(TAG + " Initialized 2 ====");
// 根据自己需要处理(不需要也行)
await Task.Delay(100);
}
#endregion
#region Private function
///
/// 设置识别回调事件
///
private void SetRecoginzeCallback()
{
Debug.Log(TAG + " SetRecoginzeCallback == ");
if (m_SpeechRecognizer != null)
{
m_SpeechRecognizer.Recognizing += OnRecognizing;
m_SpeechRecognizer.Recognized += OnRecognized;
m_SpeechRecognizer.Canceled += OnCanceled;
m_SpeechRecognizer.SessionStopped += OnSessionStopped;
Debug.Log(TAG+" SetRecoginzeCallback OK ");
}
}
#endregion
#region Callback
///
/// 正在识别
///
///
///
private void OnRecognizing(object s, SpeechRecognitionEventArgs e)
{
Debug.Log(TAG + "RecognizingSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);
if (e.Result.Reason == ResultReason.RecognizingSpeech )
{
Debug.Log(TAG + " Trigger onRecognizingAction is null :" + onRecognizingAction == null);
onRecognizingAction?.Invoke(e.Result.Text);
}
}
///
/// 识别结束
///
///
///
private void OnRecognized(object s, SpeechRecognitionEventArgs e)
{
Debug.Log(TAG + "RecognizedSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);
if (e.Result.Reason == ResultReason.RecognizedSpeech )
{
bool tmp = onRecognizedSpeechAction == null;
Debug.Log(TAG + " Trigger onRecognizedSpeechAction is null :" + tmp);
onRecognizedSpeechAction?.Invoke(e.Result.Text);
}
}
///
/// 识别取消
///
///
///
private void OnCanceled(object s, SpeechRecognitionCanceledEventArgs e)
{
Debug.LogFormat(TAG+"Canceled: Reason={0}", e.Reason );
if (e.Reason == CancellationReason.Error)
{
onErrorAction?.Invoke(e.ErrorDetails);
}
}
///
/// 会话结束
///
///
///
private void OnSessionStopped(object s, SessionEventArgs e)
{
Debug.Log(TAG+"Session stopped event." );
onSessionStoppedAction?.Invoke("Session stopped event.");
}
#endregion
#region 连续语音识别转文本
///
/// 开启连续语音识别转文本
///
public void StartContinuousRecognizer()
{
Debug.LogWarning(TAG + "StartContinuousRecognizer");
try
{
// 转到异步中(根据自己需要处理)
Loom.RunAsync(async () => {
try
{
if (m_SpeechRecognizer != null)
{
m_SpeechRecognizer.Dispose();
m_SpeechRecognizer = null;
}
if (m_SpeechRecognizer == null)
{
m_SpeechRecognizer = new SpeechRecognizer(m_SpeechConfig, m_AudioConfig);
SetRecoginzeCallback();
}
await m_SpeechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
Loom.QueueOnMainThread(() => {
Debug.LogWarning(TAG + "StartContinuousRecognizer QueueOnMainThread ok");
});
Debug.LogWarning(TAG + "StartContinuousRecognizer RunAsync ok");
}
catch (Exception e)
{
Loom.QueueOnMainThread(() =>
{
Debug.LogError(TAG + " StartContinuousRecognizer 0 " + e);
});
}
});
}
catch (Exception e)
{
Debug.LogError(TAG + " StartContinuousRecognizer 1 " + e);
}
}
///
/// 结束连续语音识别转文本
///
public async Task StopContinuousRecognizer()
{
try
{
if (m_SpeechRecognizer != null)
{
await m_SpeechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
//m_SpeechRecognizer.Dispose();
//m_SpeechRecognizer = null;
Debug.LogWarning(TAG + " StopContinuousRecognizer");
}
}
catch (Exception e)
{
Debug.LogError(TAG + " StopContinuousRecognizer Exception : " + e);
}
}
#endregion
}
3、ASRConfig
public class ASRConfig
{
#region Azure ASR
///
/// AZURE_SPEECH_KEY
///
public virtual string AZURE_SPEECH_KEY { get; } = @"You_Key";
///
/// AZURE_SPEECH_REGION
///
public virtual string AZURE_SPEECH_REGION { get; } = @"eastasia";
///
/// AZURE_SPEECH_RECOGNITION_LANGUAGE
///
public virtual string AZURE_SPEECH_RECOGNITION_LANGUAGE { get; } = @"zh-CN";
#endregion
}