安装了 Google Voice Search 的系统可以直接免费使用 Google 的语音读取接口,实时识别语音。(Google Cloud API,百度,科大讯飞的都是初期免费,量大收费)。没有太高深的技术细节,下面直接上代码。可以实现识别文字跟随语音实时输出( 前提要准备好访问外网,你懂的 )。
Activity
的代码结构仿照百度语音的 demo 写的,有多余或不对的地方还望指正~~
layout
Activity
package com.magican.xy.rttransdemo.recognize;
import android.content.Intent;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.support.v7.app.AppCompatActivity;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.ScrollView;
import android.widget.TextView;
import com.magican.xy.rttransdemo.R;
import java.util.ArrayList;
import java.util.Locale;
/**
* Created by xy on 2017/2/6.
*/
public class AndroidSpeechActivity extends AppCompatActivity implements View.OnClickListener, RecognitionListener {
private static final String TAG = "AndroidSpeech";
private Button mStartBtn;
private TextView mLogTv;
private SpeechRecognizer mRecognizer;
private long mStartTime;
public static final int STATUS_None = 0;
public static final int STATUS_WaitingReady = 2;
public static final int STATUS_Ready = 3;
public static final int STATUS_Speaking = 4;
public static final int STATUS_Recognition = 5;
private int status = STATUS_None;
@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.android_speech_activity);
mStartBtn = (Button) findViewById(R.id.and_speech_btn);
mLogTv = (TextView) findViewById(R.id.and_speech_tv);
if (mStartBtn != null) {
mStartBtn.setOnClickListener(this);
}
mRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
mRecognizer.setRecognitionListener(this);
}
@Override
public void onClick(View v) {
if (v.equals(mStartBtn)) {
switch (status) {
case STATUS_None:
start();
mStartBtn.setText("取消");
status = STATUS_WaitingReady;
break;
case STATUS_WaitingReady:
cancel();
status = STATUS_None;
mStartBtn.setText("开始");
break;
case STATUS_Ready:
cancel();
status = STATUS_None;
mStartBtn.setText("开始");
break;
case STATUS_Speaking:
stop();
status = STATUS_Recognition;
mStartBtn.setText("识别中");
break;
case STATUS_Recognition:
cancel();
status = STATUS_None;
mStartBtn.setText("开始");
break;
}
}
}
private void start() {
promptSpeechInput();
}
private void stop() {
mRecognizer.stopListening();
}
private void cancel() {
mRecognizer.cancel();
status = STATUS_None;
}
@Override
protected void onDestroy() {
super.onDestroy();
mRecognizer.destroy();
}
private void promptSpeechInput() {
mStartTime = System.currentTimeMillis();
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault());
intent.putExtra(RecognizerIntent.EXTRA_PROMPT,
getString(R.string.speech_prompt));
intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
mRecognizer.startListening(intent);
}
@Override
public void onReadyForSpeech(Bundle params) {
Log.d(TAG, "onReadyForSpeech");
status = STATUS_Ready;
print("准备完毕");
}
@Override
public void onBeginningOfSpeech() {
Log.d(TAG, "onBeginningOfSpeech");
mStartBtn.setText("说完了");
print("开始录音");
status = STATUS_Speaking;
}
@Override
public void onRmsChanged(float rmsdB) {
Log.d(TAG, "onRmsChanged: " + rmsdB);
}
@Override
public void onBufferReceived(byte[] buffer) {
}
@Override
public void onEndOfSpeech() {
Log.d(TAG, "onEndOfSpeech");
mStartBtn.setText("识别中");
print("开始识别");
status = STATUS_Recognition;
}
@Override
public void onError(int error) {
Log.e(TAG, "error code: " + error + " msg: " + getErrorMsg(error));
mStartBtn.setText("开始");
status = STATUS_None;
}
@Override
public void onResults(Bundle results) {
Log.d(TAG, "onResults: " + results.toString());
dump(results);
mStartBtn.setText("开始");
status = STATUS_None;
stop();
ArrayList nbest = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
if (nbest.size() > 0) {
print("翻译最终结果: " + nbest.get(0));
}
}
@Override
public void onPartialResults(Bundle partialResults) {
Log.d(TAG, "partialResults: " + partialResults.toString());
dump(partialResults);
ArrayList nbest = partialResults.getStringArrayList("android.speech.extra.UNSTABLE_TEXT");
if (nbest.size() > 0) {
print("翻译部分结果: " + nbest.get(0));
}
}
@Override
public void onEvent(int eventType, Bundle params) {
Log.d(TAG, "type: " + eventType + "params: " + params.toString());
dump(params);
}
private void print(final String msg) {
AndroidSpeechActivity.this.runOnUiThread(new Runnable() {
@Override
public void run() {
long t = System.currentTimeMillis() - mStartTime;
mLogTv.append(t + "ms ---- " + msg + "\n");
ScrollView sv = (ScrollView) mLogTv.getParent();
sv.smoothScrollTo(0, 1000000);
Log.d(TAG, "---- " + t + "ms ---- " + msg);
}
});
}
private void dump(Bundle bundle) {
if (bundle != null) {
Log.d(TAG, "--- dumping " + bundle.toString());
for (String key : bundle.keySet()) {
Object value = bundle.get(key);
Log.d(TAG, String.format("%s %s (%s)", key,
value.toString(), value.getClass().getName()));
}
}
}
public static String getErrorMsg(int error) {
switch (error) {
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
return "Network operation timed out.";
case SpeechRecognizer.ERROR_NETWORK:
return "Other network related errors.";
case SpeechRecognizer.ERROR_AUDIO:
return "Audio recording error.";
case SpeechRecognizer.ERROR_SERVER:
return "Server sends error status.";
case SpeechRecognizer.ERROR_CLIENT:
return "Other client side errors.";
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
return "No speech input.";
case SpeechRecognizer.ERROR_NO_MATCH:
return "No recognition result matched.";
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
return "RecognitionService busy.";
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
return "Insufficient permissions.";
default:
return "Unknown error.";
}
}
}