本文仅是自我学习,加强自己记忆。
百度语音是百度提供的语音技术服务,目前基础服务永久免费,包含语音识别、语音合成、语音唤醒三个功能。
以下先介绍语音识别功能。
按照百度语音使用流程指南(如下图),首先要注册百度账号并申请为开发者,然后在应用管理中创建应用,选择要使用的服务,这里可以选择语音识别和语音合成2种。然后下载它的SDK(含有demo——eclipse工程),最后将其libs(so库和.jar文件)集成到自己的应用中,按照开发文档开发。
以下步骤都是以Android Studio的项目工程为例。
1. 将libs和res集成到自己的应用
将libs中文件和res中的文件复制到项目工程对应的文件夹,如下图
在build.gradle(Module:app)文件中添加如下代码,这是将jniLibs的路径指向libs文件夹,这样.so库就能导入项目中:
android {
……
sourceSets {
main {
jniLibs.srcDirs = ['libs']
}
}
}
dependencies {
compile fileTree(include: ['*.jar'], dir: 'libs')
……
}
2.在AndroidManifest.xml文件中添加appId,权限等信息
appId等在应用管理中添加应用时会生成对应的id,key等。
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.cykj.baiduyuyintest">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.ACCESS_WIFI_STATE" />
<uses-permission android:name="android.permission.CHANGE_WIFI_STATE" />
<uses-permission android:name="android.permission.READ_PHONE_STATE" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
……
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<meta-data
android:name="com.baidu.speech.APP_ID"
android:value="your appId" />
<meta-data
android:name="com.baidu.speech.API_KEY"
android:value="your appKey" />
<meta-data
android:name="com.baidu.speech.SECRET_KEY"
android:value="your secretKey" />
<service
android:name="com.baidu.speech.VoiceRecognitionService"
android:exported="false" />
<activity
android:name="com.baidu.voicerecognition.android.ui.BaiduASRDigitalDialog"
android:configChanges="orientation|keyboardHidden|screenLayout"
android:exported="false"
android:screenOrientation="portrait"
android:theme="@android:style/Theme.Dialog">
<intent-filter>
<action android:name="com.baidu.action.RECOGNIZE_SPEECH" />
<category android:name="android.intent.category.DEFAULT" />
intent-filter>
activity>
……
application>
manifest>
3.在Activity中添加代码
package com.cykj.baiduyuyintest;
import android.content.ComponentName;
import android.content.Intent;
import android.content.pm.PackageInfo;
import android.content.pm.PackageManager;
import android.os.Environment;
import android.speech.RecognitionListener;
import android.speech.SpeechRecognizer;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;
import android.view.MotionEvent;
import android.view.View;
import android.view.ViewGroup;
import android.widget.Button;
import android.widget.FrameLayout;
import android.widget.RelativeLayout;
import android.widget.TextView;
import com.baidu.speech.VoiceRecognitionService;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.File;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class MainActivity extends AppCompatActivity implements RecognitionListener {
private static final String TAG = "MainActivity";
private SpeechRecognizer speechRecognizer;
private TextView tvResult;
private TextView tvError;
private View speechTips;
private View speechWave;
@Override
protected void onCreate(final Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
//创建识别器
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this, new ComponentName(this, VoiceRecognitionService.class));
//注册识别监听
speechRecognizer.setRecognitionListener(this);
//语音识别说话时显示的录音界面(麦克风)
speechTips = View.inflate(this, R.layout.bd_asr_popup_speech, null);
speechWave = speechTips.findViewById(R.id.wave);
speechTips.setVisibility(View.GONE);
addContentView(speechTips, new FrameLayout.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT, ViewGroup.LayoutParams.MATCH_PARENT));
//点击按钮开始说话进行语音识别
Button bt = (Button) findViewById(R.id.bt);
bt.setOnTouchListener(new View.OnTouchListener() {
@Override
public boolean onTouch(View v, MotionEvent event) {
switch (event.getAction()) {
case MotionEvent.ACTION_DOWN://按下时开始识别
speechTips.setVisibility(View.VISIBLE);
speechRecognizer.cancel();
tvError.setText("");
tvResult.setText("");
startASR();
break;
case MotionEvent.ACTION_UP://抬起时停止识别
speechRecognizer.stopListening();
speechTips.setVisibility(View.GONE);
break;
}
return false;
}
});
//显示识别结果
tvResult = (TextView) findViewById(R.id.tv_result);
//显示错误内容
tvError = (TextView) findViewById(R.id.tv_error);
//进入语音唤醒
findViewById(R.id.bt_wp).setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
startActivity(new Intent(MainActivity.this, WakeUpActivity.class));
}
});
}
/**
* 开始识别
*/
void startASR() {
Intent intent = new Intent();
//绑定识别参数
bindParams(intent);
speechRecognizer.startListening(intent);
}
/**
* 设置识别参数
*
* @param intent
*/
private void bindParams(Intent intent) {
intent.putExtra("sample", 16000);// 离线仅支持16000采样率
intent.putExtra("language", "cmn-Hans-CN"); // 离线仅支持中文普通话
// intent.putExtra("prop", 10060); //垂直领域(按照文档意思应该是如果垂直领域是音乐,如果是类似发音优先识别和音乐相关的词语),2.1版本后离线功能请使用grammar参数
//识别中的提示音,比如识别成功,识别错误等
intent.putExtra("sound_start", R.raw.bdspeech_recognition_start);//
intent.putExtra("sound_end", R.raw.bdspeech_speech_end);
intent.putExtra("sound_success", R.raw.bdspeech_recognition_success);
intent.putExtra("sound_error", R.raw.bdspeech_recognition_error);
intent.putExtra("sound_cancel", R.raw.bdspeech_recognition_cancel);
// intent.putExtra("vad", "input"); //语音活动检测
// intent.putExtra("nlu", "enable"); //是否启用语义解析
//音频源,可以识别此pcm文件中的语音内容
// intent.putExtra("infile", Environment.getExternalStorageDirectory()+"/outfile1.pcm");
//保存识别过程产生的录音文件
intent.putExtra("outfile", Environment.getExternalStorageDirectory() + "/outfile.pcm");
/**根据开发文档,这个垂类设置应该是2.1版本之前的用法,2.1版本以后都用grammar代替了。
int prop = 10060;
// value替换为资源文件实际路径
intent.putExtra("asr-base-file-path", Environment.getExternalStorageDirectory()+"/s_1");
if (prop == 10060) {
// 地图类附加资源,value替换为资源文件实际路径
intent.putExtra("lm-res-file-path", Environment.getExternalStorageDirectory() + "/s_2_Navi");
} else if (prop == 20000) {
// 语音输入附加资源,value替换为资源文件实际路径
intent.putExtra("lm-res-file-path", Environment.getExternalStorageDirectory() + "/s_2_InputMethod");
}*/
// value替换为license文件实际路径,仅在使用临时license文件时需要进行设置,如果在[应用管理]中开通了离线授权,不需要设置该参数
//具体参考http://yuyin.baidu.com/docs/asr/171中离线授权一节
// intent.putExtra("license-file-path", Environment.getExternalStorageDirectory() + File.separator + "temp_license_2016-12-27.txt");
//设置离线识别grammar文件,此文件来自于自定义语义设置 http://yuyin.baidu.com/asr
intent.putExtra("grammar", "assets:///baidu_speech_grammar.bsg");
//设置slot-data参数,可以替代grammar文件中初始词条内容,比如自定义语义设置中"name"词条内容是"name = 张三, 李四, 王五",添加slot-data后将变为"王云"、"流利"
JSONObject slotData = new JSONObject();
JSONArray name = new JSONArray().put("王云").put("流利");
JSONArray app = new JSONArray().put("百度糯米").put("360卫士");
try {
slotData.put("name", name);
slotData.put("appname", app);
} catch (JSONException e) {
}
intent.putExtra("slot-data", slotData.toString());
Log.d(TAG, "---" + intent.getExtras().toString());
}
@Override
public void onReadyForSpeech(Bundle params) {
// 准备就绪
Log.v(TAG, "onReadyForSpeech--" + params);
}
@Override
public void onBeginningOfSpeech() {
// 开始说话处理
Log.v(TAG, "onBeginningOfSpeech--");
}
@Override
public void onRmsChanged(float rmsdB) {
//音量变化处理
final int VTAG = 0xFF00AA01;
Integer rawHeight = (Integer) speechWave.getTag(VTAG);
if (rawHeight == null) {
rawHeight = speechWave.getLayoutParams().height;
speechWave.setTag(VTAG, rawHeight);
}
RelativeLayout.LayoutParams params = (RelativeLayout.LayoutParams) speechWave.getLayoutParams();
params.height = (int) (rawHeight * rmsdB * 0.01);
params.height = Math.max(params.height, speechWave.getMeasuredWidth());
speechWave.setLayoutParams(params);
}
@Override
public void onBufferReceived(byte[] buffer) {
// 录音数据传出处理
Log.v(TAG, "onBufferReceived--");
}
@Override
public void onEndOfSpeech() {
//说话结束处理
Log.v(TAG, "onEndOfSpeech--");
}
@Override
public void onError(int error) {
//出错
Log.e(TAG, "onError--" + error);
StringBuilder sb = new StringBuilder();
switch (error) {
case SpeechRecognizer.ERROR_AUDIO:
sb.append("音频错误");
break;
case SpeechRecognizer.ERROR_CLIENT:
sb.append("其他客户端错误");
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
sb.append("权限不足");
break;
case SpeechRecognizer.ERROR_NETWORK:
sb.append("网络连接错误");
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
sb.append("网络连接超时");
break;
case SpeechRecognizer.ERROR_NO_MATCH:
sb.append("没有匹配的识别结果");
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
sb.append("引擎忙");
break;
case SpeechRecognizer.ERROR_SERVER:
sb.append("服务器端错误");
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
sb.append("超时");
break;
}
tvError.setText(sb);
}
@Override
public void onResults(Bundle results) {
//最终结果
Log.v(TAG, "onResults--" + results);
tvResult.setText("");
ArrayList resultsRecognition = results.getStringArrayList("results_recognition");
for (String s : resultsRecognition) {
tvResult.append(s + " ");
}
}
@Override
public void onPartialResults(Bundle partialResults) {
// 临时结果处理
Log.v(TAG, "onPartialResults--");
}
@Override
public void onEvent(int eventType, Bundle params) {
// 处理事件回调
Log.e(TAG, "onEvent--" + eventType + "--" + params);
}
}
识别返回结果:
onResults--Bundle[{results_recognition=[开始], error=0, origin_result={"content":{"item":["开始"]},"result":{"sn":"d42c1a78-0f44-40e9-8ea6-80e640110a6f","idx":-11,"res_type":3,"err_no":0,"corpus_no":6369803402677818267}}}]
参考的资料:百度开发文档地址,grammar文件生成地址,源代码下载