避开Google Voice Search利用Google Speech API实现Android语音识别

最近自己写一个小东西,突发奇想要做个语音识别出来,网上查了很多资料,发现大部分是要装google voice search,或则使用第三方的SDK如讯飞等!

自己感觉不爽,毕竟无论是装google voice search还是申请讯飞的key都很麻烦,后来发现了http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&maxresults=1&lang=zh-CN 这个地址后就产生了想法,于是就有了下面的东西。

首先是录音的代码:

privatevoidstartRecording(){if (mRecorder == null|| mRecorder.getState() != AudioRecord.STATE_INITIALIZED){        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);        mHandler.sendMessage(msg);return;    }     mRecorder.startRecording();if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){        textView.setText(R.string.recording);new Thread(){@Overridepublicvoidrun(){byte[] tmpBuffer = newbyte[mBufferSize/2];while (mRecorder != null&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){int numOfRead = mRecorder.read(tmpBuffer,0,tmpBuffer.length);if (numOfRead < 0){                        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_RECORDING,0);                        mHandler.sendMessage(msg);break;                    } float sum = 0;for (int i=0; i < tmpBuffer.length; i+=2){short t = (short)(tmpBuffer[i] | (tmpBuffer[i+1] <<8 ));                        sum += Math.abs(t);                    }float rms = sum/(tmpBuffer.length * 2);                    Message msg = mHandler.obtainMessage(MSG_RECORD_RECORDING,(int)rms,0);                    mHandler.sendMessage(msg);if (mRecordedData.length > mRecordedLength + numOfRead){                        System.arraycopy(tmpBuffer,0,mRecordedData,mRecordedLength,numOfRead);                        mRecordedLength += numOfRead;                    }else {break;                    }                }                mHandler.sendEmptyMessage(MSG_RECORD_STOPPED);            }        }.start();     }else {        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);        mHandler.sendMessage(msg);    }}
   
   

因为Google的那个网址能识别的格式有限,而PCM又非常容易转化为wav格式的文件,所以下一步就是将录音的数据非常成格式。

从上面可以看到录音的数据我是存放到mRecordedData里面,而mRecordedLength是录音长度,下面是转化为wav格式的代码:

privatevoidcreateWavHeaderIfNeed(boolean forceCreate){if (!forceCreate && wavHeader != null){return;    }// sample rate * number of channel * bit per sample / bit per bytesint avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / 8;    wavHeader = newbyte[]{            'R','I','F','F',           //id = RIFF , fixed chars0, 0, 0, 0,                // RIFF WAVE chunk size = 36 + data length'W','A','V','E',           //  Type/* Format chunk */'f','m','t',' ',          // id = 'fmt '16, 0, 0, 0,              // format chunk size = 16, if 18, means existing extension message1, 0,                     // format tag, 0x0001 = 16 pcm(byte)mChannels, 0, // number of channels (MONO = 1, STEREO =2)/* 4 bytes , sample rate */(byte)(mSampleRate & 0xff),            (byte)((mSampleRate >>8) & 0xff),            (byte)((mSampleRate >>16) & 0xff),            (byte)((mSampleRate >>24) & 0xff),/* 4 bytes average bytes per seconds */(byte)(avgBytesPerSec & 0xff),            (byte)((avgBytesPerSec >>8) & 0xff),            (byte)((avgBytesPerSec >>16) & 0xff),            (byte)((avgBytesPerSec >>24) & 0xff),/* 2 bytes, block align *//******************************             *              sample 1             ******************************             * channel 0 least| channel 0 most|             * ******************************/(byte)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / 8), // per sample in bytes0,/* 2 bytes, Bits per sample */16, 0,/* data chunk */'d','a','t','a', /// Id = 'data'0, 0, 0, 0// data size, set 0 due to unknown yet};} privatevoidsetWavHeaderInt(int offset,int value){if (offset < 0 || offset >40){//total length = 44, int length = 4,//44 - 4 = 40thrownew IllegalArgumentException("offset out of range");    }createWavHeaderIfNeed(false);     wavHeader[offset++] = (byte)(value & 0xff);    wavHeader[offset++] = (byte)((value >>8) & 0xff);    wavHeader[offset++] = (byte)((value >>16) & 0xff);    wavHeader[offset] = (byte)((value >>24) & 0xff);} privatebyte[] getWavData(){setWavHeaderInt(4,36+mRecordedLength);setWavHeaderInt(40,mRecordedLength);byte[] wavData = newbyte[44+mRecordedLength];    System.arraycopy(wavHeader,0,wavData,0,wavHeader.length);    System.arraycopy(mRecordedData,0,wavData,wavHeader.length,mRecordedLength);return wavData;}

  通过上面的getWavData()就可以获得wav格式的录音数据了。那么接下来就是提交到前面提交的网址上去等待返回的数据了。这一步很简单就是做一个post的工作,代码如下:

private HttpURLConnection getConnection(){    HttpURLConnection connection = null;try{        URL httpUrl = new URL(GOOGLE_VOICE_API_URL + mLang);        connection = (HttpURLConnection)httpUrl.openConnection();        connection.setConnectTimeout(DEFAULT_CONNECT_TIMEOUT);        connection.setReadTimeout(DEFAULT_READ_TIMEOUT);        connection.setRequestMethod("POST");        connection.setDoInput(true);        connection.setDoOutput(true);        connection.setUseCaches(false);        connection.setRequestProperty("User-Agent",USER_AGENT);        connection.setRequestProperty("Content-Type",CONTENT_TYPE_WAV);    }catch (MalformedURLException ex){        JLog.e(TAG,"getConnection();Invalid url format",ex);    }catch (ProtocolException ex){        JLog.e(TAG, "getConnection();Un support protocol",ex);    }catch (IOException ex){        JLog.e(TAG,"getConnection();IO error while open connection",ex);    }return connection;} privatevoidstartWebRecognizer(finalbyte[] wavData){    textView.setText(R.string.analyzing);final HttpURLConnection connection = getConnection();if (connection == null){        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);        mHandler.sendMessage(msg);    }else {new Thread(){@Overridepublicvoidrun(){try {                    DataOutputStream dos = new DataOutputStream(connection.getOutputStream());                    dos.write(wavData);                    dos.flush();                    dos.close();                     InputStreamReader inputStreamReader = new InputStreamReader(connection.getInputStream(),                            Charset.forName("utf-8"));                    BufferedReader bufferedReader = new BufferedReader(inputStreamReader);                    StringBuilder sb = new StringBuilder();                    String tmpStr = null;while ((tmpStr = bufferedReader.readLine()) != null){                        sb.append(tmpStr);                    }                    Message msg = mHandler.obtainMessage(MSG_DECODE_DATA,sb.toString());                    mHandler.sendMessage(msg);                }catch (IOException ex){                    Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);                    mHandler.sendMessage(msg);                }            }        }.start();    }}

  OK,现在我们获得了返回的数据,那么接着就是解析返回的数据了。首先说明下google返回的数据格式,是如下的json数据:

{  "status":0,    /* 结果代码,0是成功,4是no speech, 5是no match */"id":"c421dee91abe31d9b8457f2a80ebca91-1",    /* 识别编号 */"hypotheses":    /* 假设,即结果 */     [          {  "utterance":"下午好",    /* 话语 */"confidence":0.2507637/* 信心,即准确度 */         }      ]  }

  这里说明下,返回的结果条数是根据前面的maxresults=1来确定的,如果是2就会返回两条,而这些结果是按照准确度从高到低排列的,理论最高值为1.

下面不废话,开始解析结果:

privatevoidstartParseJson(String jsonString){try{        JSONObject jsonObject = newJSONObject(jsonString);int status = jsonObject.getInt("status");if (status == 0){            JSONArray hypotheses = jsonObject.getJSONArray("hypotheses");if (hypotheses!= null && hypotheses.length() >0){                JSONObject hypot = hypotheses.optJSONObject(0);                String speechText = hypot.getString("utterance");                Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NONE,0,speechText);                mHandler.sendMessage(msg);            }        }elseif (status == 4){            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_SPEECH,0);            mHandler.sendMessage(msg);        }elseif (status == 5){            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_MATCH,0);            mHandler.sendMessage(msg);        }    }catch (JSONException ex){        JLog.e(TAG,"Decode JSON error",ex);        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_DECODING,0);        mHandler.sendMessage(msg);    }}

  这样我们就完成了speech to text的过程就是通常所说的语音识别。下面贴上这个activity的完整代码:

package com.jecofang.catebutler.activities;import android.content.Intent;import android.graphics.drawable.AnimationDrawable;import android.media.AudioFormat;import android.media.AudioRecord;import android.media.MediaRecorder;import android.os.Bundle;import android.os.Handler;import android.os.Message;import android.view.View;import android.widget.ImageView;import android.widget.TextView;import com.jecofang.catebutler.R;import com.jecofang.catebutler.base.BaseActivity;import com.jecofang.catebutler.common.JLog;import org.json.JSONArray;import org.json.JSONException;import org.json.JSONObject;import java.io.BufferedReader;import java.io.DataOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.ProtocolException;import java.net.URL;import java.nio.charset.Charset;/** * *************************************** * File Name : SpeechRecognitionActivity * Author : Jeco Fang * Email : [email protected] * Create on : 13-7-19 * All rights reserved 2013 - 2013 * **************************************** */publicclass SpeechRecognitionActivity extends BaseActivity {privatestaticfinal String TAG = "SpeechRecognitionActivity";/* Recording params */publicstaticfinal String AUDIO_SOURCE = "AudioSource";privatestaticfinalint DEFAULT_AUDIO_SOURCE = MediaRecorder.AudioSource.VOICE_RECOGNITION;publicstaticfinal String SAMPLE_RATE = "SampleRate";privatestaticfinalint DEFAULT_SAMPLE_RATE = 16000;privatestaticfinalint DEFAULT_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;privatestaticfinalshort DEFAULT_PER_SAMPLE_IN_BYTES = 2;privatestaticfinalshort DEFAULT_PER_SAMPLE_IN_BIT = 16;publicstaticfinal String CHANNELS = "Channels";privatestaticfinalshort DEFAULT_CHANNELS = 1; //Number of channels (MONO = 1, STEREO = 2)/* Web API params */publicstaticfinal String LANGUAGE = "Language";privatestaticfinal String DEFAULT_LANGUAGE = "zh-CN";privatestaticfinal String GOOGLE_VOICE_API_URL ="http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&maxresults=1&lang=";privatestaticfinal String USER_AGENT = "Mozilla/5.0";privatestaticfinalint DEFAULT_CONNECT_TIMEOUT = 10 * 1000; //10 sec;privatestaticfinalint DEFAULT_READ_TIMEOUT = 20 * 1000; //20 sec;privatestaticfinal String CONTENT_TYPE_WAV = "audio/L16;rate=16000"; /* Message Types */privatestaticfinalint MSG_PREPARE_RECORDER = 1;privatestaticfinalint MSG_START_RECORDING = 2;privatestaticfinalint MSG_RECORD_RECORDING = 3;privatestaticfinalint MSG_STOP_RECORDING = 4;privatestaticfinalint MSG_RECORD_STOPPED = 5;privatestaticfinalint MSG_DECODE_DATA = 6;privatestaticfinalint MSG_ERROR = 7; /* Errors */publicstaticfinalint ERR_NONE = 0;publicstaticfinalint ERR_UNKNOWN = -1;publicstaticfinalint ERR_UN_SUPPORT_PARAMS = -2;publicstaticfinalint ERR_ILLEGAL_STATE = -3;publicstaticfinalint ERR_RECORDING = -4;publicstaticfinalint ERR_NETWORK = -5;publicstaticfinalint ERR_NO_SPEECH = -6;publicstaticfinalint ERR_NO_MATCH = -7;publicstaticfinalint ERR_DECODING = -8; privateint mSampleRate;privateshort mChannels;privateint mAudioSource; private AudioRecord mRecorder;privateint mBufferSize;privateint mRecordedLength;privatebyte[] mRecordedData;privatebyte[] wavHeader; privateenum  State{        IDLE,        BUSY    } private String mLang; private Handler mHandler = newInternalHandler();private State mState; private ImageView imageView;private TextView textView; publicvoidonCreate(Bundle savedInstanceState) {super.onCreate(savedInstanceState);setContentView(R.layout.activity_speech_recognition);         imageView = (ImageView)findViewById(R.id.iv_speaking);        textView = (TextView)findViewById(R.id.tv_result);        mState = State.IDLE;    } @OverridepublicvoidonStart(){super.onStart();        JLog.d("onStart");if (mState == State.IDLE){            Intent intent = getIntent();            mAudioSource = intent.getIntExtra(AUDIO_SOURCE,DEFAULT_AUDIO_SOURCE);            mSampleRate = intent.getIntExtra(SAMPLE_RATE,DEFAULT_SAMPLE_RATE);            mChannels = intent.getShortExtra(CHANNELS,DEFAULT_CHANNELS);            mLang = intent.getStringExtra(LANGUAGE);if (mLang == null || mLang.trim().length() == 0){                mLang = DEFAULT_LANGUAGE;            }if (!isNetworkAvailable()){                Message message = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK);                mHandler.sendMessage(message);            }else {                mHandler.sendEmptyMessageDelayed(MSG_PREPARE_RECORDER,500);            }        }    } @OverridepublicvoidonStop(){super.onStop();        JLog.d("onStop");    } @OverridepublicvoidonPause(){super.onPause();        JLog.d("onPause");    } @OverridepublicvoidonResume(){super.onResume();        JLog.d("onResume");    } privateclass InternalHandler extends Handler{privatelong lastTalkTime;privatelong startTime;        AnimationDrawable animationDrawable; @OverridepublicvoidhandleMessage(Message msg){switch (msg.what){case MSG_PREPARE_RECORDER:                    mState = State.BUSY;                    JLog.d("Prepare recorder");prepareRecorder();break;case MSG_START_RECORDING:                    startTime = System.currentTimeMillis();                    lastTalkTime = 0;                    JLog.d("Start recording");startRecording();                    textView.setText(R.string.speech);break;case MSG_RECORD_RECORDING://After 5 seconds started recording, if there is no speech, send stop message.//In recording if no speech time exclude 3 seconds, send stop messagelong currentTime = System.currentTimeMillis();int volume = msg.arg1;                    JLog.d(TAG,"Record recording.Volume = %d",volume );if (lastTalkTime == 0){if (volume >= 30){                            lastTalkTime = currentTime;startAnimationIfNeed(animationDrawable);                        }else {stopAnimation(animationDrawable);if (currentTime - startTime >= 5 * 1000){                                mHandler.sendEmptyMessage(MSG_STOP_RECORDING);                            }                        }                    }else {if (volume >= 30){                            lastTalkTime = currentTime;startAnimationIfNeed(animationDrawable);                        }else {stopAnimation(animationDrawable);if (currentTime - lastTalkTime >= 3 * 1000){                                mHandler.sendEmptyMessage(MSG_STOP_RECORDING);                            }                        }                    }break;case MSG_STOP_RECORDING:                    JLog.d("Stop recording");stopAnimation(animationDrawable);stopRecording();break;case MSG_RECORD_STOPPED:                    JLog.d("Recorder stopped, try to get remote data");byte[] wavData = getWavData();startWebRecognizer(wavData); if (mRecorder != null){                        mRecorder.release();                        mRecorder = null;                    }break;case MSG_DECODE_DATA:                    String data = "";if (msg.obj != null){                        data = msg.obj.toString();                    }                    JLog.d("Try to parse data :" + data);if (data.trim().length()>0){startParseJson(data.trim());                    }else {                        Message message = mHandler.obtainMessage(MSG_ERROR,ERR_UNKNOWN,0);                        mHandler.sendMessage(message);                    }break;case MSG_ERROR:                    mState = State.IDLE;if (mRecorder != null){                        mRecorder.release();                        mRecorder = null;                    }                    Intent intent = newIntent();                    intent.putExtra(SPEECH_RESULT_STATUS,msg.arg1);if (msg.obj != null){                        JLog.d("Error:"+msg.arg1+";value"+msg.obj);                        intent.putExtra(SPEECH_RESULT_VALUE,msg.obj.toString());                    }                    JLog.d("Error:"+msg.arg1);setResult(RESULT_OK,intent);finish();break;default:break;            }        }    } privatevoidprepareRecorder(){int minBufferSize = AudioRecord.getMinBufferSize(mSampleRate,                AudioFormat.CHANNEL_IN_MONO,DEFAULT_AUDIO_ENCODING);if (minBufferSize == AudioRecord.ERROR_BAD_VALUE){            JLog.e(TAG, "Params are not support by hardware.\\\\n"+ "sample rate: %d; channel: %2x; encoding: %2x",                    mSampleRate,                    AudioFormat.CHANNEL_IN_MONO,                    DEFAULT_AUDIO_ENCODING);            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_UN_SUPPORT_PARAMS,0);            mHandler.sendMessage(msg);return;        }elseif (minBufferSize == AudioRecord.ERROR){            JLog.w(TAG,"Unable to query hardware for output property");            minBufferSize = mSampleRate * (120 / 1000) * DEFAULT_PER_SAMPLE_IN_BYTES * mChannels;        }        mBufferSize = minBufferSize * 2;         mRecorder = newAudioRecord(mAudioSource,mSampleRate,                AudioFormat.CHANNEL_IN_MONO,DEFAULT_AUDIO_ENCODING,mBufferSize);if (mRecorder.getState() != AudioRecord.STATE_INITIALIZED){            JLog.e(TAG,"AudioRecord initialize failed");            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);            mHandler.sendMessage(msg);return;        }         mRecordedLength = 0;int maxRecordLength = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BYTES * 35;        mRecordedData = newbyte[maxRecordLength];        Message msg = mHandler.obtainMessage(MSG_START_RECORDING);        mHandler.sendMessage(msg);    } privatevoidstartRecording(){if (mRecorder == null|| mRecorder.getState() != AudioRecord.STATE_INITIALIZED){            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);            mHandler.sendMessage(msg);return;        }         mRecorder.startRecording();if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){            textView.setText(R.string.recording);new Thread(){@Overridepublicvoidrun(){byte[] tmpBuffer = newbyte[mBufferSize/2];while (mRecorder != null&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){int numOfRead = mRecorder.read(tmpBuffer,0,tmpBuffer.length);if (numOfRead < 0){                            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_RECORDING,0);                            mHandler.sendMessage(msg);break;                        } float sum = 0;for (int i=0; i < tmpBuffer.length; i+=2){short t = (short)(tmpBuffer[i] | (tmpBuffer[i+1] <<8 ));                            sum += Math.abs(t);                        }float rms = sum/(tmpBuffer.length * 2);                        Message msg = mHandler.obtainMessage(MSG_RECORD_RECORDING,(int)rms,0);                        mHandler.sendMessage(msg);if (mRecordedData.length > mRecordedLength + numOfRead){                            System.arraycopy(tmpBuffer,0,mRecordedData,mRecordedLength,numOfRead);                            mRecordedLength += numOfRead;                        }else {break;                        }                    }                    mHandler.sendEmptyMessage(MSG_RECORD_STOPPED);                }            }.start();         }else {            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);            mHandler.sendMessage(msg);        }    } privatevoidstopRecording(){if (mRecorder != null&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){            mRecorder.stop();        }    } privatevoidcreateWavHeaderIfNeed(boolean forceCreate){if (!forceCreate && wavHeader != null){return;        }// sample rate * number of channel * bit per sample / bit per bytesint avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / 8;        wavHeader = newbyte[]{                'R','I','F','F',           //id = RIFF , fixed chars0, 0, 0, 0,                // RIFF WAVE chunk size = 36 + data length'W','A','V','E',           //  Type/* Format chunk */'f','m','t',' ',          // id = 'fmt '16, 0, 0, 0,              // format chunk size = 16, if 18, means existing extension message1, 0,                     // format tag, 0x0001 = 16 pcm(byte)mChannels, 0, // number of channels (MONO = 1, STEREO =2)/* 4 bytes , sample rate */(byte)(mSampleRate & 0xff),                (byte)((mSampleRate >>8) & 0xff),                (byte)((mSampleRate >>16) & 0xff),                (byte)((mSampleRate >>24) & 0xff),/* 4 bytes average bytes per seconds */(byte)(avgBytesPerSec & 0xff),                (byte)((avgBytesPerSec >>8) & 0xff),                (byte)((avgBytesPerSec >>16) & 0xff),                (byte)((avgBytesPerSec >>24) & 0xff),/* 2 bytes, block align *//******************************                 *              sample 1                 ******************************                 * channel 0 least| channel 0 most|                 * ******************************/(byte)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / 8), // per sample in bytes0,/* 2 bytes, Bits per sample */16, 0,/* data chunk */'d','a','t','a', /// Id = 'data'0, 0, 0, 0// data size, set 0 due to unknown yet};    } privatevoidsetWavHeaderInt(int offset,int value){if (offset < 0 || offset >40){//total length = 44, int length = 4,//44 - 4 = 40thrownew IllegalArgumentException("offset out of range");        }createWavHeaderIfNeed(false);         wavHeader[offset++] = (byte)(value & 0xff);        wavHeader[offset++] = (byte)((value >>8) & 0xff);        wavHeader[offset++] = (byte)((value >>16) & 0xff);        wavHeader[offset] = (byte)((value >>24) & 0xff);    } privatebyte[] getWavData(){setWavHeaderInt(4,36+mRecordedLength);setWavHeaderInt(40,mRecordedLength);byte[] wavData = newbyte[44+mRecordedLength];        System.arraycopy(wavHeader,0,wavData,0,wavHeader.length);        System.arraycopy(mRecordedData,0,wavData,wavHeader.length,mRecordedLength);return wavData;    } private HttpURLConnection getConnection(){        HttpURLConnection connection = null;try{            URL httpUrl = new URL(GOOGLE_VOICE_API_URL + mLang);            connection = (HttpURLConnection)httpUrl.openConnection();            connection.setConnectTimeout(DEFAULT_CONNECT_TIMEOUT);            connection.setReadTimeout(DEFAULT_READ_TIMEOUT);            connection.setRequestMethod("POST");            connection.setDoInput(true);            connection.setDoOutput(true);            connection.setUseCaches(false);            connection.setRequestProperty("User-Agent",USER_AGENT);            connection.setRequestProperty("Content-Type",CONTENT_TYPE_WAV);        }catch (MalformedURLException ex){            JLog.e(TAG,"getConnection();Invalid url format",ex);        }catch (ProtocolException ex){            JLog.e(TAG, "getConnection();Un support protocol",ex);        }catch (IOException ex){            JLog.e(TAG,"getConnection();IO error while open connection",ex);        }return connection;    } privatevoidstartWebRecognizer(finalbyte[] wavData){        textView.setText(R.string.analyzing);final HttpURLConnection connection = getConnection();if (connection == null){            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);            mHandler.sendMessage(msg);        }else {new Thread(){@Overridepublicvoidrun(){try {                        DataOutputStream dos = new DataOutputStream(connection.getOutputStream());                        dos.write(wavData);                        dos.flush();                        dos.close();                         InputStreamReader inputStreamReader = new InputStreamReader(connection.getInputStream(),                                Charset.forName("utf-8"));                        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);                        StringBuilder sb = new StringBuilder();                        String tmpStr = null;while ((tmpStr = bufferedReader.readLine()) != null){                            sb.append(tmpStr);                        }                        Message msg = mHandler.obtainMessage(MSG_DECODE_DATA,sb.toString());                        mHandler.sendMessage(msg);                    }catch (IOException ex){                        Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);                        mHandler.sendMessage(msg);                    }                }            }.start();        }    } privatevoidstartParseJson(String jsonString){try{            JSONObject jsonObject = newJSONObject(jsonString);int status = jsonObject.getInt("status");if (status == 0){                JSONArray hypotheses = jsonObject.getJSONArray("hypotheses");if (hypotheses!= null && hypotheses.length() >0){                    JSONObject hypot = hypotheses.optJSONObject(0);                    String speechText = hypot.getString("utterance");                    Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NONE,0,speechText);                    mHandler.sendMessage(msg);                }            }elseif (status == 4){                Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_SPEECH,0);                mHandler.sendMessage(msg);            }elseif (status == 5){                Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_MATCH,0);                mHandler.sendMessage(msg);            }        }catch (JSONException ex){            JLog.e(TAG,"Decode JSON error",ex);            Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_DECODING,0);            mHandler.sendMessage(msg);        }    } privatevoidstartAnimationIfNeed(AnimationDrawable animationDrawable){        imageView.setVisibility(View.VISIBLE);if (animationDrawable == null){            imageView.setBackgroundResource(R.anim.speak_view);            animationDrawable = (AnimationDrawable)imageView.getBackground();        } if (animationDrawable != null && !animationDrawable.isRunning()){            animationDrawable.start();        }    } privatevoidstopAnimation(AnimationDrawable animationDrawable){        imageView.setVisibility(View.INVISIBLE);    }}
   
   

必须说一句的就是里面的JLog.x是自己简单封装了下Log的类,主要是统一控制log level。BaseActivity是activity的一些常用方法的封装以及自定义的一些常量,这里用的只有几个常量:

protectedstaticfinalint GET_SPEECH_RESULT = 1;protectedstaticfinal String SPEECH_RESULT_STATUS = "speechResultStatus";protectedstaticfinal String SPEECH_RESULT_VALUE = "speechResultValue";

layout文件代码:

  "1.0" encoding="utf-8"?>
  
   "http://schemas.android.com/apk/res/android"android:layout_width=
   "fill_parent"android:layout_height=
   "fill_parent"android:background=
   "#90000000">
   
    "fill_parent"android:layout_height=
    "wrap_content"android:layout_centerInParent=
    "true">
    
     "240dp"android:layout_height=
     "wrap_content"android:orientation=
     "vertical"android:layout_centerHorizontal=
     "true">
     
     
     
      "@+id/image_layout"android:layout_height=
      "230dp"android:layout_width=
      "230dp"android:layout_centerInParent=
      "true">
      
       "@+id/iv_speaking"android:layout_height=
       "wrap_content"android:layout_width=
       "wrap_content"android:layout_centerInParent=
       "true">
       
       
        "wrap_content"android:layout_width=
        "wrap_content"android:layout_centerInParent=
        "true"android:background=
        "@drawable/ic_speech">
        
        
         "@+id/tv_result"android:layout_height=
         "wrap_content"android:layout_width=
         "wrap_content"android:textColor=
         "#FFFFFFFF"android:textSize=
         "14sp"android:singleLine=
         "true"android:ellipsize=
         "marquee"android:marqueeRepeatLimit=
         "marquee_forever"android:layout_marginTop=
         "40dip"android:layout_centerInParent=
         "true">
         
         
         
        
       
      
     
    
   
  

 整个layout的背景是设置的#90000000,就是黑色的半透明。

speak animation的代码:

  "1.0" encoding="utf-8"?>
  
   "false"xmlns:android=
   "http://schemas.android.com/apk/res/android">
   
    "150" android:drawable=
    "@drawable/mic_1" />
    
     "150" android:drawable=
     "@drawable/mic_2" />
     
      "150" android:drawable=
      "@drawable/mic_3" />
      
       "150" android:drawable=
       "@drawable/mic_4" />
       
      
     
    
   
  

  其实就是几张半透明的从小到大的圆圈。

至于调用就很简单了:

ib_Speak = (ImageButton)findViewById(R.id.main_bottom_bar_ib_speak);       ib_Speak.setOnClickListener(new View.OnClickListener() {           @OverridepublicvoidonClick(View view) {               Intent intent = newIntent(MainActivity.this,SpeechRecognitionActivity.class);               startActivityForResult(intent, GET_SPEECH_RESULT);               //Intent intent = new Intent(MainActivity.this,Record.class);//startActivity(intent);   }       });

获取结果:

@OverrideprotectedvoidonActivityResult(int requestCode, int resultCode, Intent data){if (requestCode == GET_SPEECH_RESULT){if (resultCode == RESULT_CANCELED){//do nothing for now}elseif (resultCode == RESULT_OK){            JLog.i("status;"+ data.getIntExtra(SPEECH_RESULT_STATUS,0));switch (data.getIntExtra(SPEECH_RESULT_STATUS,0)){case SpeechRecognitionActivity.ERR_NONE:                    String text = data.getStringExtra(SPEECH_RESULT_VALUE);if (text != null && text.trim().length() >0){submitText(text);                    }break;default:                    Toast.makeText(this,R.string.error,Toast.LENGTH_SHORT).show();break;            }        }    }}

你可能感兴趣的:(android,Google,语音识别)