避开Google Voice Search利用Google Speech API实现Android语音识别

<p>最近自己写一个小东西,突发奇想要做个语音识别出来,网上查了很多资料,发现大部分是要装google voice search,或则使用第三方的SDK如讯飞等!</p>

<p>自己感觉不爽,毕竟无论是装google voice search还是申请讯飞的key都很麻烦,后来发现了http://www.google.com/speech-api/v1/recognize?xjerr=1&amp;client=chromium&amp;maxresults=1&amp;lang=zh-CN 这个地址后就产生了想法,于是就有了下面的东西。</p>

<p>首先是录音的代码:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">private</span> <span class="dt">void</span> <span class="fu">startRecording</span>(){

   <span class="kw">if</span> (mRecorder == <span class="kw">null</span>

           || mRecorder.<span class="fu">getState</span>() != AudioRecord.<span class="fu">STATE_INITIALIZED</span>){

       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_ILLEGAL_STATE,<span class="dv">0</span>);

       mHandler.<span class="fu">sendMessage</span>(msg);

       <span class="kw">return</span>;

   }


   mRecorder.<span class="fu">startRecording</span>();

   <span class="kw">if</span> (mRecorder.<span class="fu">getRecordingState</span>() == AudioRecord.<span class="fu">RECORDSTATE_RECORDING</span>){

       textView.<span class="fu">setText</span>(R.<span class="fu">string</span>.<span class="fu">recording</span>);

       <span class="kw">new</span> Thread(){

           <span class="fu">@Override</span>

           <span class="kw">public</span> <span class="dt">void</span> <span class="fu">run</span>(){

               <span class="dt">byte</span>[] tmpBuffer = <span class="kw">new</span> <span class="dt">byte</span>[mBufferSize/<span class="dv">2</span>];

               <span class="kw">while</span> (mRecorder != <span class="kw">null</span>

                       &amp;&amp; mRecorder.<span class="fu">getRecordingState</span>() == AudioRecord.<span class="fu">RECORDSTATE_RECORDING</span>){

                   <span class="dt">int</span> numOfRead = mRecorder.<span class="fu">read</span>(tmpBuffer,<span class="dv">0</span>,tmpBuffer.<span class="fu">length</span>);

                   <span class="kw">if</span> (numOfRead &lt; <span class="dv">0</span>){

                       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_RECORDING,<span class="dv">0</span>);

                       mHandler.<span class="fu">sendMessage</span>(msg);

                       <span class="kw">break</span>;

                   }


                   <span class="dt">float</span> sum = <span class="dv">0</span>;

                   <span class="kw">for</span> (<span class="dt">int</span> i=<span class="dv">0</span>; i &lt; tmpBuffer.<span class="fu">length</span>; i+=<span class="dv">2</span>){

                       <span class="dt">short</span> t = (<span class="dt">short</span>)(tmpBuffer[i] | (tmpBuffer[i<span class="dv">+1</span>] &lt;&lt;<span class="dv">8</span> ));

                       sum += Math.<span class="fu">abs</span>(t);

                   }

                   <span class="dt">float</span> rms = sum/(tmpBuffer.<span class="fu">length</span> * <span class="dv">2</span>);

                   Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_RECORD_RECORDING,(<span class="dt">int</span>)rms,<span class="dv">0</span>);

                   mHandler.<span class="fu">sendMessage</span>(msg);

                   <span class="kw">if</span> (mRecordedData.<span class="fu">length</span> &gt; mRecordedLength + numOfRead){

                       System.<span class="fu">arraycopy</span>(tmpBuffer,<span class="dv">0</span>,mRecordedData,mRecordedLength,numOfRead);

                       mRecordedLength += numOfRead;

                   }<span class="kw">else</span> {

                       <span class="kw">break</span>;

                   }

               }

               mHandler.<span class="fu">sendEmptyMessage</span>(MSG_RECORD_STOPPED);

           }

       }.<span class="fu">start</span>();


   }<span class="kw">else</span> {

       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_ILLEGAL_STATE,<span class="dv">0</span>);

       mHandler.<span class="fu">sendMessage</span>(msg);

   }

}</code></pre>

<p>因为Google的那个网址能识别的格式有限,而PCM又非常容易转化为wav格式的文件,所以下一步就是将录音的数据非常成格式。</p>

<p>从上面可以看到录音的数据我是存放到mRecordedData里面,而mRecordedLength是录音长度,下面是转化为wav格式的代码:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">private</span> <span class="dt">void</span> <span class="fu">createWavHeaderIfNeed</span>(<span class="dt">boolean</span> forceCreate){

   <span class="kw">if</span> (!forceCreate &amp;&amp; wavHeader != <span class="kw">null</span>){

       <span class="kw">return</span>;

   }

   <span class="co">// sample rate * number of channel * bit per sample / bit per bytes</span>

   <span class="dt">int</span> avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / <span class="dv">8</span>;

   wavHeader = <span class="kw">new</span> <span class="dt">byte</span>[]{

           &#39;R&#39;,&#39;I&#39;,&#39;F&#39;,&#39;F&#39;,           <span class="co">//id = RIFF , fixed chars</span>

           <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>,                <span class="co">// RIFF WAVE chunk size = 36 + data length</span>

           &#39;W&#39;,&#39;A&#39;,&#39;V&#39;,&#39;E&#39;,           <span class="co">//  Type</span>

           <span class="co">/* Format chunk */</span>

           &#39;f&#39;,&#39;m&#39;,&#39;t&#39;,&#39; &#39;,          <span class="co">// id = &#39;fmt &#39;</span>

           <span class="dv">16</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>,              <span class="co">// format chunk size = 16, if 18, means existing extension message</span>

           <span class="dv">1</span>, <span class="dv">0</span>,                     <span class="co">// format tag, 0x0001 = 16 pcm</span>

           (<span class="dt">byte</span>)mChannels, <span class="dv">0</span>, <span class="co">// number of channels (MONO = 1, STEREO =2)</span>

           <span class="co">/* 4 bytes , sample rate */</span>

           (<span class="dt">byte</span>)(mSampleRate &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>),

           <span class="co">/* 4 bytes average bytes per seconds */</span>

           (<span class="dt">byte</span>)(avgBytesPerSec &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>),

           (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>),

           <span class="co">/* 2 bytes, block align */</span>

           <span class="co">/******************************</span>

<span class="co">             *              sample 1</span>

<span class="co">             ******************************</span>

<span class="co">             * channel 0 least| channel 0 most|</span>

<span class="co">             * ******************************/</span>

           (<span class="dt">byte</span>)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / <span class="dv">8</span>), <span class="co">// per sample in bytes</span>

           <span class="dv">0</span>,

           <span class="co">/* 2 bytes, Bits per sample */</span>

           <span class="dv">16</span>, <span class="dv">0</span>,

           <span class="co">/* data chunk */</span>

           &#39;d&#39;,&#39;a&#39;,&#39;t&#39;,&#39;a&#39;, <span class="co">/// Id = &#39;data&#39;</span>

           <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>   <span class="co">// data size, set 0 due to unknown yet</span>

   };

}


<span class="kw">private</span> <span class="dt">void</span> <span class="fu">setWavHeaderInt</span>(<span class="dt">int</span> offset,<span class="dt">int</span> value){

   <span class="kw">if</span> (offset &lt; <span class="dv">0</span> || offset &gt; <span class="dv">40</span>){

       <span class="co">//total length = 44, int length = 4,</span>

       <span class="co">//44 - 4 = 40</span>

       <span class="kw">throw</span> <span class="kw">new</span> IllegalArgumentException(<span class="st">&quot;offset out of range&quot;</span>);

   }

   <span class="fu">createWavHeaderIfNeed</span>(<span class="kw">false</span>);


   wavHeader[offset++] = (<span class="dt">byte</span>)(value &amp; <span class="bn">0xff</span>);

   wavHeader[offset++] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>);

   wavHeader[offset++] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>);

   wavHeader[offset] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>);

}


<span class="kw">private</span> <span class="dt">byte</span>[] <span class="fu">getWavData</span>(){

   <span class="fu">setWavHeaderInt</span>(<span class="dv">4</span>,<span class="dv">36</span>+mRecordedLength);

   <span class="fu">setWavHeaderInt</span>(<span class="dv">40</span>,mRecordedLength);

   <span class="dt">byte</span>[] wavData = <span class="kw">new</span> <span class="dt">byte</span>[<span class="dv">44</span>+mRecordedLength];

   System.<span class="fu">arraycopy</span>(wavHeader,<span class="dv">0</span>,wavData,<span class="dv">0</span>,wavHeader.<span class="fu">length</span>);

   System.<span class="fu">arraycopy</span>(mRecordedData,<span class="dv">0</span>,wavData,wavHeader.<span class="fu">length</span>,mRecordedLength);

   <span class="kw">return</span> wavData;

}</code></pre>

<p>  通过上面的getWavData()就可以获得wav格式的录音数据了。那么接下来就是提交到前面提交的网址上去等待返回的数据了。这一步很简单就是做一个post的工作,代码如下:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">private</span> HttpURLConnection <span class="fu">getConnection</span>(){

   HttpURLConnection connection = <span class="kw">null</span>;

   <span class="kw">try</span>{

       URL httpUrl = <span class="kw">new</span> URL(GOOGLE_VOICE_API_URL + mLang);

       connection = (HttpURLConnection)httpUrl.<span class="fu">openConnection</span>();

       connection.<span class="fu">setConnectTimeout</span>(DEFAULT_CONNECT_TIMEOUT);

       connection.<span class="fu">setReadTimeout</span>(DEFAULT_READ_TIMEOUT);

       connection.<span class="fu">setRequestMethod</span>(<span class="st">&quot;POST&quot;</span>);

       connection.<span class="fu">setDoInput</span>(<span class="kw">true</span>);

       connection.<span class="fu">setDoOutput</span>(<span class="kw">true</span>);

       connection.<span class="fu">setUseCaches</span>(<span class="kw">false</span>);

       connection.<span class="fu">setRequestProperty</span>(<span class="st">&quot;User-Agent&quot;</span>,USER_AGENT);

       connection.<span class="fu">setRequestProperty</span>(<span class="st">&quot;Content-Type&quot;</span>,CONTENT_TYPE_WAV);

   }<span class="kw">catch</span> (MalformedURLException ex){

       JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;getConnection();Invalid url format&quot;</span>,ex);

   }<span class="kw">catch</span> (ProtocolException ex){

       JLog.<span class="fu">e</span>(TAG, <span class="st">&quot;getConnection();Un support protocol&quot;</span>,ex);

   }<span class="kw">catch</span> (IOException ex){

       JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;getConnection();IO error while open connection&quot;</span>,ex);

   }

   <span class="kw">return</span> connection;

}


<span class="kw">private</span> <span class="dt">void</span> <span class="fu">startWebRecognizer</span>(<span class="dt">final</span> <span class="dt">byte</span>[] wavData){

   textView.<span class="fu">setText</span>(R.<span class="fu">string</span>.<span class="fu">analyzing</span>);

   <span class="dt">final</span> HttpURLConnection connection = <span class="fu">getConnection</span>();

   <span class="kw">if</span> (connection == <span class="kw">null</span>){

       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NETWORK,<span class="dv">0</span>);

       mHandler.<span class="fu">sendMessage</span>(msg);

   }<span class="kw">else</span> {

       <span class="kw">new</span> Thread(){

           <span class="fu">@Override</span>

           <span class="kw">public</span> <span class="dt">void</span> <span class="fu">run</span>(){

               <span class="kw">try</span> {

                   DataOutputStream dos = <span class="kw">new</span> DataOutputStream(connection.<span class="fu">getOutputStream</span>());

                   dos.<span class="fu">write</span>(wavData);

                   dos.<span class="fu">flush</span>();

                   dos.<span class="fu">close</span>();


                   InputStreamReader inputStreamReader = <span class="kw">new</span> InputStreamReader(connection.<span class="fu">getInputStream</span>(),

                           Charset.<span class="fu">forName</span>(<span class="st">&quot;utf-8&quot;</span>));

                   BufferedReader bufferedReader = <span class="kw">new</span> BufferedReader(inputStreamReader);

                   StringBuilder sb = <span class="kw">new</span> StringBuilder();

                   String tmpStr = <span class="kw">null</span>;

                   <span class="kw">while</span> ((tmpStr = bufferedReader.<span class="fu">readLine</span>()) != <span class="kw">null</span>){

                       sb.<span class="fu">append</span>(tmpStr);

                   }

                   Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_DECODE_DATA,sb.<span class="fu">toString</span>());

                   mHandler.<span class="fu">sendMessage</span>(msg);

               }<span class="kw">catch</span> (IOException ex){

                   Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NETWORK,<span class="dv">0</span>);

                   mHandler.<span class="fu">sendMessage</span>(msg);

               }

           }

       }.<span class="fu">start</span>();

   }

}</code></pre>

<p>  OK,现在我们获得了返回的数据,那么接着就是解析返回的数据了。首先说明下google返回的数据格式,是如下的json数据:</p>

<pre class="sourceCode java"><code class="sourceCode java">{  

   <span class="st">&quot;status&quot;</span>:<span class="dv">0</span>,    <span class="co">/* 结果代码,0是成功,4是no speech, 5是no match */</span>

   <span class="st">&quot;id&quot;</span>:<span class="st">&quot;c421dee91abe31d9b8457f2a80ebca91-1&quot;</span>,    <span class="co">/* 识别编号 */</span>

   <span class="st">&quot;hypotheses&quot;</span>:    <span class="co">/* 假设,即结果 */</span>

   [  

       {  

           <span class="st">&quot;utterance&quot;</span>:<span class="st">&quot;下午好&quot;</span>,    <span class="co">/* 话语 */</span>

           <span class="st">&quot;confidence&quot;</span>:<span class="fl">0.2507637</span>    <span class="co">/* 信心,即准确度 */</span>

       }  

   ]  

}</code></pre>

<p>  这里说明下,返回的结果条数是根据前面的maxresults=1来确定的,如果是2就会返回两条,而这些结果是按照准确度从高到低排列的,理论最高值为1.</p>

<p>下面不废话,开始解析结果:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">private</span> <span class="dt">void</span> <span class="fu">startParseJson</span>(String jsonString){

   <span class="kw">try</span>{

       JSONObject jsonObject = <span class="kw">new</span> <span class="fu">JSONObject</span>(jsonString);

       <span class="dt">int</span> status = jsonObject.<span class="fu">getInt</span>(<span class="st">&quot;status&quot;</span>);

       <span class="kw">if</span> (status == <span class="dv">0</span>){

           JSONArray hypotheses = jsonObject.<span class="fu">getJSONArray</span>(<span class="st">&quot;hypotheses&quot;</span>);

           <span class="kw">if</span> (hypotheses!= <span class="kw">null</span> &amp;&amp; hypotheses.<span class="fu">length</span>() &gt; <span class="dv">0</span>){

               JSONObject hypot = hypotheses.<span class="fu">optJSONObject</span>(<span class="dv">0</span>);

               String speechText = hypot.<span class="fu">getString</span>(<span class="st">&quot;utterance&quot;</span>);

               Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NONE,<span class="dv">0</span>,speechText);

               mHandler.<span class="fu">sendMessage</span>(msg);

           }

       }<span class="kw">else</span> <span class="kw">if</span> (status == <span class="dv">4</span>){

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NO_SPEECH,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

       }<span class="kw">else</span> <span class="kw">if</span> (status == <span class="dv">5</span>){

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NO_MATCH,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

       }

   }<span class="kw">catch</span> (JSONException ex){

       JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;Decode JSON error&quot;</span>,ex);

       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_DECODING,<span class="dv">0</span>);

       mHandler.<span class="fu">sendMessage</span>(msg);

   }

}</code></pre>

<p>  这样我们就完成了speech to text的过程就是通常所说的语音识别。下面贴上这个activity的完整代码:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">package com.jecofang.catebutler.activities;</span>


<span class="kw">import android.content.Intent;</span>

<span class="kw">import android.graphics.drawable.AnimationDrawable;</span>

<span class="kw">import android.media.AudioFormat;</span>

<span class="kw">import android.media.AudioRecord;</span>

<span class="kw">import android.media.MediaRecorder;</span>

<span class="kw">import android.os.Bundle;</span>

<span class="kw">import android.os.Handler;</span>

<span class="kw">import android.os.Message;</span>

<span class="kw">import android.view.View;</span>

<span class="kw">import android.widget.ImageView;</span>

<span class="kw">import android.widget.TextView;</span>

<span class="kw">import com.jecofang.catebutler.R;</span>

<span class="kw">import com.jecofang.catebutler.base.BaseActivity;</span>

<span class="kw">import com.jecofang.catebutler.common.JLog;</span>

<span class="kw">import org.json.JSONArray;</span>

<span class="kw">import org.json.JSONException;</span>

<span class="kw">import org.json.JSONObject;</span>


<span class="kw">import java.io.BufferedReader;</span>

<span class="kw">import java.io.DataOutputStream;</span>

<span class="kw">import java.io.IOException;</span>

<span class="kw">import java.io.InputStreamReader;</span>

<span class="kw">import java.net.HttpURLConnection;</span>

<span class="kw">import java.net.MalformedURLException;</span>

<span class="kw">import java.net.ProtocolException;</span>

<span class="kw">import java.net.URL;</span>

<span class="kw">import java.nio.charset.Charset;</span>


<span class="co">/**</span>

<span class="co"> * ***************************************</span>

<span class="co"> * File Name : SpeechRecognitionActivity</span>

<span class="co"> * Author : Jeco Fang</span>

<span class="co"> * Email : [email protected]</span>

<span class="co"> * Create on : 13-7-19</span>

<span class="co"> * All rights reserved 2013 - 2013</span>

<span class="co"> * ****************************************</span>

<span class="co"> */</span>

<span class="kw">public</span> <span class="kw">class</span> SpeechRecognitionActivity <span class="kw">extends</span> BaseActivity {

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> String TAG = <span class="st">&quot;SpeechRecognitionActivity&quot;</span>;

   <span class="co">/* Recording params */</span>

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> String AUDIO_SOURCE = <span class="st">&quot;AudioSource&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> DEFAULT_AUDIO_SOURCE = MediaRecorder.<span class="fu">AudioSource</span>.<span class="fu">VOICE_RECOGNITION</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> String SAMPLE_RATE = <span class="st">&quot;SampleRate&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> DEFAULT_SAMPLE_RATE = <span class="dv">16000</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> DEFAULT_AUDIO_ENCODING = AudioFormat.<span class="fu">ENCODING_PCM_16BIT</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">short</span> DEFAULT_PER_SAMPLE_IN_BYTES = <span class="dv">2</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">short</span> DEFAULT_PER_SAMPLE_IN_BIT = <span class="dv">16</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> String CHANNELS = <span class="st">&quot;Channels&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">short</span> DEFAULT_CHANNELS = <span class="dv">1</span>; <span class="co">//Number of channels (MONO = 1, STEREO = 2)</span>


   <span class="co">/* Web API params */</span>

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> String LANGUAGE = <span class="st">&quot;Language&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> String DEFAULT_LANGUAGE = <span class="st">&quot;zh-CN&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> String GOOGLE_VOICE_API_URL =

           <span class="st">&quot;http://www.google.com/speech-api/v1/recognize?xjerr=1&amp;client=chromium&amp;maxresults=1&amp;lang=&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> String USER_AGENT = <span class="st">&quot;Mozilla/5.0&quot;</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> DEFAULT_CONNECT_TIMEOUT = <span class="dv">10</span> * <span class="dv">1000</span>; <span class="co">//10 sec;</span>

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> DEFAULT_READ_TIMEOUT = <span class="dv">20</span> * <span class="dv">1000</span>; <span class="co">//20 sec;</span>

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> String CONTENT_TYPE_WAV = <span class="st">&quot;audio/L16;rate=16000&quot;</span>;


   <span class="co">/* Message Types */</span>

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_PREPARE_RECORDER = <span class="dv">1</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_START_RECORDING = <span class="dv">2</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_RECORD_RECORDING = <span class="dv">3</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_STOP_RECORDING = <span class="dv">4</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_RECORD_STOPPED = <span class="dv">5</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_DECODE_DATA = <span class="dv">6</span>;

   <span class="kw">private</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> MSG_ERROR = <span class="dv">7</span>;


   <span class="co">/* Errors */</span>

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_NONE = <span class="dv">0</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_UNKNOWN = -<span class="dv">1</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_UN_SUPPORT_PARAMS = -<span class="dv">2</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_ILLEGAL_STATE = -<span class="dv">3</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_RECORDING = -<span class="dv">4</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_NETWORK = -<span class="dv">5</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_NO_SPEECH = -<span class="dv">6</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_NO_MATCH = -<span class="dv">7</span>;

   <span class="kw">public</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> ERR_DECODING = -<span class="dv">8</span>;


   <span class="kw">private</span> <span class="dt">int</span> mSampleRate;

   <span class="kw">private</span> <span class="dt">short</span> mChannels;

   <span class="kw">private</span> <span class="dt">int</span> mAudioSource;


   <span class="kw">private</span> AudioRecord mRecorder;

   <span class="kw">private</span> <span class="dt">int</span> mBufferSize;

   <span class="kw">private</span> <span class="dt">int</span> mRecordedLength;

   <span class="kw">private</span> <span class="dt">byte</span>[] mRecordedData;

   <span class="kw">private</span> <span class="dt">byte</span>[] wavHeader;


   <span class="kw">private</span> <span class="kw">enum</span>  State{

       IDLE,

       BUSY

   }


   <span class="kw">private</span> String mLang;


   <span class="kw">private</span> Handler mHandler = <span class="kw">new</span> <span class="fu">InternalHandler</span>();

   <span class="kw">private</span> State mState;


   <span class="kw">private</span> ImageView imageView;

   <span class="kw">private</span> TextView textView;


   <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onCreate</span>(Bundle savedInstanceState) {

       <span class="kw">super</span>.<span class="fu">onCreate</span>(savedInstanceState);

       <span class="fu">setContentView</span>(R.<span class="fu">layout</span>.<span class="fu">activity_speech_recognition</span>);


       imageView = (ImageView)<span class="fu">findViewById</span>(R.<span class="fu">id</span>.<span class="fu">iv_speaking</span>);

       textView = (TextView)<span class="fu">findViewById</span>(R.<span class="fu">id</span>.<span class="fu">tv_result</span>);

       mState = State.<span class="fu">IDLE</span>;

   }


   <span class="fu">@Override</span>

   <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onStart</span>(){

       <span class="kw">super</span>.<span class="fu">onStart</span>();

       JLog.<span class="fu">d</span>(<span class="st">&quot;onStart&quot;</span>);

       <span class="kw">if</span> (mState == State.<span class="fu">IDLE</span>){

           Intent intent = <span class="fu">getIntent</span>();

           mAudioSource = intent.<span class="fu">getIntExtra</span>(AUDIO_SOURCE,DEFAULT_AUDIO_SOURCE);

           mSampleRate = intent.<span class="fu">getIntExtra</span>(SAMPLE_RATE,DEFAULT_SAMPLE_RATE);

           mChannels = intent.<span class="fu">getShortExtra</span>(CHANNELS,DEFAULT_CHANNELS);

           mLang = intent.<span class="fu">getStringExtra</span>(LANGUAGE);

           <span class="kw">if</span> (mLang == <span class="kw">null</span> || mLang.<span class="fu">trim</span>().<span class="fu">length</span>() == <span class="dv">0</span>){

               mLang = DEFAULT_LANGUAGE;

           }

           <span class="kw">if</span> (!<span class="fu">isNetworkAvailable</span>()){

               Message message = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NETWORK);

               mHandler.<span class="fu">sendMessage</span>(message);

           }<span class="kw">else</span> {

               mHandler.<span class="fu">sendEmptyMessageDelayed</span>(MSG_PREPARE_RECORDER,<span class="dv">500</span>);

           }

       }

   }


   <span class="fu">@Override</span>

   <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onStop</span>(){

       <span class="kw">super</span>.<span class="fu">onStop</span>();

       JLog.<span class="fu">d</span>(<span class="st">&quot;onStop&quot;</span>);

   }


   <span class="fu">@Override</span>

   <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onPause</span>(){

       <span class="kw">super</span>.<span class="fu">onPause</span>();

       JLog.<span class="fu">d</span>(<span class="st">&quot;onPause&quot;</span>);

   }


   <span class="fu">@Override</span>

   <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onResume</span>(){

       <span class="kw">super</span>.<span class="fu">onResume</span>();

       JLog.<span class="fu">d</span>(<span class="st">&quot;onResume&quot;</span>);

   }


   <span class="kw">private</span> <span class="kw">class</span> InternalHandler <span class="kw">extends</span> Handler{

       <span class="kw">private</span> <span class="dt">long</span> lastTalkTime;

       <span class="kw">private</span> <span class="dt">long</span> startTime;

       AnimationDrawable animationDrawable;


       <span class="fu">@Override</span>

       <span class="kw">public</span> <span class="dt">void</span> <span class="fu">handleMessage</span>(Message msg){

           <span class="kw">switch</span> (msg.<span class="fu">what</span>){

               <span class="kw">case</span> MSG_PREPARE_RECORDER:

                   mState = State.<span class="fu">BUSY</span>;

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Prepare recorder&quot;</span>);

                   <span class="fu">prepareRecorder</span>();

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_START_RECORDING:

                   startTime = System.<span class="fu">currentTimeMillis</span>();

                   lastTalkTime = <span class="dv">0</span>;

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Start recording&quot;</span>);

                   <span class="fu">startRecording</span>();

                   textView.<span class="fu">setText</span>(R.<span class="fu">string</span>.<span class="fu">speech</span>);

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_RECORD_RECORDING:

                   <span class="co">//After 5 seconds started recording, if there is no speech, send stop message.</span>

                   <span class="co">//In recording if no speech time exclude 3 seconds, send stop message</span>

                   <span class="dt">long</span> currentTime = System.<span class="fu">currentTimeMillis</span>();

                   <span class="dt">int</span> volume = msg.<span class="fu">arg1</span>;

                   JLog.<span class="fu">d</span>(TAG,<span class="st">&quot;Record recording.Volume = %d&quot;</span>,volume );

                   <span class="kw">if</span> (lastTalkTime == <span class="dv">0</span>){

                       <span class="kw">if</span> (volume &gt;= <span class="dv">30</span>){

                           lastTalkTime = currentTime;

                           <span class="fu">startAnimationIfNeed</span>(animationDrawable);

                       }<span class="kw">else</span> {

                           <span class="fu">stopAnimation</span>(animationDrawable);

                           <span class="kw">if</span> (currentTime - startTime &gt;= <span class="dv">5</span> * <span class="dv">1000</span>){

                               mHandler.<span class="fu">sendEmptyMessage</span>(MSG_STOP_RECORDING);

                           }

                       }

                   }<span class="kw">else</span> {

                       <span class="kw">if</span> (volume &gt;= <span class="dv">30</span>){

                           lastTalkTime = currentTime;

                           <span class="fu">startAnimationIfNeed</span>(animationDrawable);

                       }<span class="kw">else</span> {

                           <span class="fu">stopAnimation</span>(animationDrawable);

                           <span class="kw">if</span> (currentTime - lastTalkTime &gt;= <span class="dv">3</span> * <span class="dv">1000</span>){

                               mHandler.<span class="fu">sendEmptyMessage</span>(MSG_STOP_RECORDING);

                           }

                       }

                   }

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_STOP_RECORDING:

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Stop recording&quot;</span>);

                   <span class="fu">stopAnimation</span>(animationDrawable);

                   <span class="fu">stopRecording</span>();

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_RECORD_STOPPED:

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Recorder stopped, try to get remote data&quot;</span>);

                   <span class="dt">byte</span>[] wavData = <span class="fu">getWavData</span>();

                   <span class="fu">startWebRecognizer</span>(wavData);


                   <span class="kw">if</span> (mRecorder != <span class="kw">null</span>){

                       mRecorder.<span class="fu">release</span>();

                       mRecorder = <span class="kw">null</span>;

                   }

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_DECODE_DATA:

                   String data = <span class="st">&quot;&quot;</span>;

                   <span class="kw">if</span> (msg.<span class="fu">obj</span> != <span class="kw">null</span>){

                       data = msg.<span class="fu">obj</span>.<span class="fu">toString</span>();

                   }

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Try to parse data :&quot;</span> + data);

                   <span class="kw">if</span> (data.<span class="fu">trim</span>().<span class="fu">length</span>()&gt; <span class="dv">0</span>){

                       <span class="fu">startParseJson</span>(data.<span class="fu">trim</span>());

                   }<span class="kw">else</span> {

                       Message message = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_UNKNOWN,<span class="dv">0</span>);

                       mHandler.<span class="fu">sendMessage</span>(message);

                   }

                   <span class="kw">break</span>;

               <span class="kw">case</span> MSG_ERROR:

                   mState = State.<span class="fu">IDLE</span>;

                   <span class="kw">if</span> (mRecorder != <span class="kw">null</span>){

                       mRecorder.<span class="fu">release</span>();

                       mRecorder = <span class="kw">null</span>;

                   }

                   Intent intent = <span class="kw">new</span> <span class="fu">Intent</span>();

                   intent.<span class="fu">putExtra</span>(SPEECH_RESULT_STATUS,msg.<span class="fu">arg1</span>);

                   <span class="kw">if</span> (msg.<span class="fu">obj</span> != <span class="kw">null</span>){

                       JLog.<span class="fu">d</span>(<span class="st">&quot;Error:&quot;</span>+msg.<span class="fu">arg1</span>+<span class="st">&quot;;value&quot;</span>+msg.<span class="fu">obj</span>);

                       intent.<span class="fu">putExtra</span>(SPEECH_RESULT_VALUE,msg.<span class="fu">obj</span>.<span class="fu">toString</span>());

                   }

                   JLog.<span class="fu">d</span>(<span class="st">&quot;Error:&quot;</span>+msg.<span class="fu">arg1</span>);

                   <span class="fu">setResult</span>(RESULT_OK,intent);

                   <span class="fu">finish</span>();

                   <span class="kw">break</span>;

               <span class="kw">default</span>:

                   <span class="kw">break</span>;

           }

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">prepareRecorder</span>(){

       <span class="dt">int</span> minBufferSize = AudioRecord.<span class="fu">getMinBufferSize</span>(mSampleRate,

               AudioFormat.<span class="fu">CHANNEL_IN_MONO</span>,DEFAULT_AUDIO_ENCODING);

       <span class="kw">if</span> (minBufferSize == AudioRecord.<span class="fu">ERROR_BAD_VALUE</span>){

           JLog.<span class="fu">e</span>(TAG, <span class="st">&quot;Params are not support by hardware.</span><span class="ch">\n</span><span class="st">&quot;</span>

                   + <span class="st">&quot;sample rate: %d; channel: %2x; encoding: %2x&quot;</span>,

                   mSampleRate,

                   AudioFormat.<span class="fu">CHANNEL_IN_MONO</span>,

                   DEFAULT_AUDIO_ENCODING);

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_UN_SUPPORT_PARAMS,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

           <span class="kw">return</span>;

       }<span class="kw">else</span> <span class="kw">if</span> (minBufferSize == AudioRecord.<span class="fu">ERROR</span>){

           JLog.<span class="fu">w</span>(TAG,<span class="st">&quot;Unable to query hardware for output property&quot;</span>);

           minBufferSize = mSampleRate * (<span class="dv">120</span> / <span class="dv">1000</span>) * DEFAULT_PER_SAMPLE_IN_BYTES * mChannels;

       }

       mBufferSize = minBufferSize * <span class="dv">2</span>;


       mRecorder = <span class="kw">new</span> <span class="fu">AudioRecord</span>(mAudioSource,mSampleRate,

               AudioFormat.<span class="fu">CHANNEL_IN_MONO</span>,DEFAULT_AUDIO_ENCODING,mBufferSize);

       <span class="kw">if</span> (mRecorder.<span class="fu">getState</span>() != AudioRecord.<span class="fu">STATE_INITIALIZED</span>){

           JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;AudioRecord initialize failed&quot;</span>);

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_ILLEGAL_STATE,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

           <span class="kw">return</span>;

       }


       mRecordedLength = <span class="dv">0</span>;

       <span class="dt">int</span> maxRecordLength = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BYTES * <span class="dv">35</span>;

       mRecordedData = <span class="kw">new</span> <span class="dt">byte</span>[maxRecordLength];

       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_START_RECORDING);

       mHandler.<span class="fu">sendMessage</span>(msg);

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">startRecording</span>(){

       <span class="kw">if</span> (mRecorder == <span class="kw">null</span>

               || mRecorder.<span class="fu">getState</span>() != AudioRecord.<span class="fu">STATE_INITIALIZED</span>){

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_ILLEGAL_STATE,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

           <span class="kw">return</span>;

       }


       mRecorder.<span class="fu">startRecording</span>();

       <span class="kw">if</span> (mRecorder.<span class="fu">getRecordingState</span>() == AudioRecord.<span class="fu">RECORDSTATE_RECORDING</span>){

           textView.<span class="fu">setText</span>(R.<span class="fu">string</span>.<span class="fu">recording</span>);

           <span class="kw">new</span> Thread(){

               <span class="fu">@Override</span>

               <span class="kw">public</span> <span class="dt">void</span> <span class="fu">run</span>(){

                   <span class="dt">byte</span>[] tmpBuffer = <span class="kw">new</span> <span class="dt">byte</span>[mBufferSize/<span class="dv">2</span>];

                   <span class="kw">while</span> (mRecorder != <span class="kw">null</span>

                           &amp;&amp; mRecorder.<span class="fu">getRecordingState</span>() == AudioRecord.<span class="fu">RECORDSTATE_RECORDING</span>){

                       <span class="dt">int</span> numOfRead = mRecorder.<span class="fu">read</span>(tmpBuffer,<span class="dv">0</span>,tmpBuffer.<span class="fu">length</span>);

                       <span class="kw">if</span> (numOfRead &lt; <span class="dv">0</span>){

                           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_RECORDING,<span class="dv">0</span>);

                           mHandler.<span class="fu">sendMessage</span>(msg);

                           <span class="kw">break</span>;

                       }


                       <span class="dt">float</span> sum = <span class="dv">0</span>;

                       <span class="kw">for</span> (<span class="dt">int</span> i=<span class="dv">0</span>; i &lt; tmpBuffer.<span class="fu">length</span>; i+=<span class="dv">2</span>){

                           <span class="dt">short</span> t = (<span class="dt">short</span>)(tmpBuffer[i] | (tmpBuffer[i<span class="dv">+1</span>] &lt;&lt;<span class="dv">8</span> ));

                           sum += Math.<span class="fu">abs</span>(t);

                       }

                       <span class="dt">float</span> rms = sum/(tmpBuffer.<span class="fu">length</span> * <span class="dv">2</span>);

                       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_RECORD_RECORDING,(<span class="dt">int</span>)rms,<span class="dv">0</span>);

                       mHandler.<span class="fu">sendMessage</span>(msg);

                       <span class="kw">if</span> (mRecordedData.<span class="fu">length</span> &gt; mRecordedLength + numOfRead){

                           System.<span class="fu">arraycopy</span>(tmpBuffer,<span class="dv">0</span>,mRecordedData,mRecordedLength,numOfRead);

                           mRecordedLength += numOfRead;

                       }<span class="kw">else</span> {

                           <span class="kw">break</span>;

                       }

                   }

                   mHandler.<span class="fu">sendEmptyMessage</span>(MSG_RECORD_STOPPED);

               }

           }.<span class="fu">start</span>();


       }<span class="kw">else</span> {

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_ILLEGAL_STATE,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">stopRecording</span>(){

       <span class="kw">if</span> (mRecorder != <span class="kw">null</span>

               &amp;&amp; mRecorder.<span class="fu">getRecordingState</span>() == AudioRecord.<span class="fu">RECORDSTATE_RECORDING</span>){

           mRecorder.<span class="fu">stop</span>();

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">createWavHeaderIfNeed</span>(<span class="dt">boolean</span> forceCreate){

       <span class="kw">if</span> (!forceCreate &amp;&amp; wavHeader != <span class="kw">null</span>){

           <span class="kw">return</span>;

       }

       <span class="co">// sample rate * number of channel * bit per sample / bit per bytes</span>

       <span class="dt">int</span> avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / <span class="dv">8</span>;

       wavHeader = <span class="kw">new</span> <span class="dt">byte</span>[]{

               &#39;R&#39;,&#39;I&#39;,&#39;F&#39;,&#39;F&#39;,           <span class="co">//id = RIFF , fixed chars</span>

               <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>,                <span class="co">// RIFF WAVE chunk size = 36 + data length</span>

               &#39;W&#39;,&#39;A&#39;,&#39;V&#39;,&#39;E&#39;,           <span class="co">//  Type</span>

               <span class="co">/* Format chunk */</span>

               &#39;f&#39;,&#39;m&#39;,&#39;t&#39;,&#39; &#39;,          <span class="co">// id = &#39;fmt &#39;</span>

               <span class="dv">16</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>,              <span class="co">// format chunk size = 16, if 18, means existing extension message</span>

               <span class="dv">1</span>, <span class="dv">0</span>,                     <span class="co">// format tag, 0x0001 = 16 pcm</span>

               (<span class="dt">byte</span>)mChannels, <span class="dv">0</span>, <span class="co">// number of channels (MONO = 1, STEREO =2)</span>

               <span class="co">/* 4 bytes , sample rate */</span>

               (<span class="dt">byte</span>)(mSampleRate &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((mSampleRate &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>),

               <span class="co">/* 4 bytes average bytes per seconds */</span>

               (<span class="dt">byte</span>)(avgBytesPerSec &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>),

               (<span class="dt">byte</span>)((avgBytesPerSec &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>),

               <span class="co">/* 2 bytes, block align */</span>

               <span class="co">/******************************</span>

<span class="co">                 *              sample 1</span>

<span class="co">                 ******************************</span>

<span class="co">                 * channel 0 least| channel 0 most|</span>

<span class="co">                 * ******************************/</span>

               (<span class="dt">byte</span>)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / <span class="dv">8</span>), <span class="co">// per sample in bytes</span>

               <span class="dv">0</span>,

               <span class="co">/* 2 bytes, Bits per sample */</span>

               <span class="dv">16</span>, <span class="dv">0</span>,

               <span class="co">/* data chunk */</span>

               &#39;d&#39;,&#39;a&#39;,&#39;t&#39;,&#39;a&#39;, <span class="co">/// Id = &#39;data&#39;</span>

               <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>   <span class="co">// data size, set 0 due to unknown yet</span>

       };

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">setWavHeaderInt</span>(<span class="dt">int</span> offset,<span class="dt">int</span> value){

       <span class="kw">if</span> (offset &lt; <span class="dv">0</span> || offset &gt; <span class="dv">40</span>){

           <span class="co">//total length = 44, int length = 4,</span>

           <span class="co">//44 - 4 = 40</span>

           <span class="kw">throw</span> <span class="kw">new</span> IllegalArgumentException(<span class="st">&quot;offset out of range&quot;</span>);

       }

       <span class="fu">createWavHeaderIfNeed</span>(<span class="kw">false</span>);


       wavHeader[offset++] = (<span class="dt">byte</span>)(value &amp; <span class="bn">0xff</span>);

       wavHeader[offset++] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">8</span>) &amp; <span class="bn">0xff</span>);

       wavHeader[offset++] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">16</span>) &amp; <span class="bn">0xff</span>);

       wavHeader[offset] = (<span class="dt">byte</span>)((value &gt;&gt; <span class="dv">24</span>) &amp; <span class="bn">0xff</span>);

   }


   <span class="kw">private</span> <span class="dt">byte</span>[] <span class="fu">getWavData</span>(){

       <span class="fu">setWavHeaderInt</span>(<span class="dv">4</span>,<span class="dv">36</span>+mRecordedLength);

       <span class="fu">setWavHeaderInt</span>(<span class="dv">40</span>,mRecordedLength);

       <span class="dt">byte</span>[] wavData = <span class="kw">new</span> <span class="dt">byte</span>[<span class="dv">44</span>+mRecordedLength];

       System.<span class="fu">arraycopy</span>(wavHeader,<span class="dv">0</span>,wavData,<span class="dv">0</span>,wavHeader.<span class="fu">length</span>);

       System.<span class="fu">arraycopy</span>(mRecordedData,<span class="dv">0</span>,wavData,wavHeader.<span class="fu">length</span>,mRecordedLength);

       <span class="kw">return</span> wavData;

   }


   <span class="kw">private</span> HttpURLConnection <span class="fu">getConnection</span>(){

       HttpURLConnection connection = <span class="kw">null</span>;

       <span class="kw">try</span>{

           URL httpUrl = <span class="kw">new</span> URL(GOOGLE_VOICE_API_URL + mLang);

           connection = (HttpURLConnection)httpUrl.<span class="fu">openConnection</span>();

           connection.<span class="fu">setConnectTimeout</span>(DEFAULT_CONNECT_TIMEOUT);

           connection.<span class="fu">setReadTimeout</span>(DEFAULT_READ_TIMEOUT);

           connection.<span class="fu">setRequestMethod</span>(<span class="st">&quot;POST&quot;</span>);

           connection.<span class="fu">setDoInput</span>(<span class="kw">true</span>);

           connection.<span class="fu">setDoOutput</span>(<span class="kw">true</span>);

           connection.<span class="fu">setUseCaches</span>(<span class="kw">false</span>);

           connection.<span class="fu">setRequestProperty</span>(<span class="st">&quot;User-Agent&quot;</span>,USER_AGENT);

           connection.<span class="fu">setRequestProperty</span>(<span class="st">&quot;Content-Type&quot;</span>,CONTENT_TYPE_WAV);

       }<span class="kw">catch</span> (MalformedURLException ex){

           JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;getConnection();Invalid url format&quot;</span>,ex);

       }<span class="kw">catch</span> (ProtocolException ex){

           JLog.<span class="fu">e</span>(TAG, <span class="st">&quot;getConnection();Un support protocol&quot;</span>,ex);

       }<span class="kw">catch</span> (IOException ex){

           JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;getConnection();IO error while open connection&quot;</span>,ex);

       }

       <span class="kw">return</span> connection;

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">startWebRecognizer</span>(<span class="dt">final</span> <span class="dt">byte</span>[] wavData){

       textView.<span class="fu">setText</span>(R.<span class="fu">string</span>.<span class="fu">analyzing</span>);

       <span class="dt">final</span> HttpURLConnection connection = <span class="fu">getConnection</span>();

       <span class="kw">if</span> (connection == <span class="kw">null</span>){

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NETWORK,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

       }<span class="kw">else</span> {

           <span class="kw">new</span> Thread(){

               <span class="fu">@Override</span>

               <span class="kw">public</span> <span class="dt">void</span> <span class="fu">run</span>(){

                   <span class="kw">try</span> {

                       DataOutputStream dos = <span class="kw">new</span> DataOutputStream(connection.<span class="fu">getOutputStream</span>());

                       dos.<span class="fu">write</span>(wavData);

                       dos.<span class="fu">flush</span>();

                       dos.<span class="fu">close</span>();


                       InputStreamReader inputStreamReader = <span class="kw">new</span> InputStreamReader(connection.<span class="fu">getInputStream</span>(),

                               Charset.<span class="fu">forName</span>(<span class="st">&quot;utf-8&quot;</span>));

                       BufferedReader bufferedReader = <span class="kw">new</span> BufferedReader(inputStreamReader);

                       StringBuilder sb = <span class="kw">new</span> StringBuilder();

                       String tmpStr = <span class="kw">null</span>;

                       <span class="kw">while</span> ((tmpStr = bufferedReader.<span class="fu">readLine</span>()) != <span class="kw">null</span>){

                           sb.<span class="fu">append</span>(tmpStr);

                       }

                       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_DECODE_DATA,sb.<span class="fu">toString</span>());

                       mHandler.<span class="fu">sendMessage</span>(msg);

                   }<span class="kw">catch</span> (IOException ex){

                       Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NETWORK,<span class="dv">0</span>);

                       mHandler.<span class="fu">sendMessage</span>(msg);

                   }

               }

           }.<span class="fu">start</span>();

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">startParseJson</span>(String jsonString){

       <span class="kw">try</span>{

           JSONObject jsonObject = <span class="kw">new</span> <span class="fu">JSONObject</span>(jsonString);

           <span class="dt">int</span> status = jsonObject.<span class="fu">getInt</span>(<span class="st">&quot;status&quot;</span>);

           <span class="kw">if</span> (status == <span class="dv">0</span>){

               JSONArray hypotheses = jsonObject.<span class="fu">getJSONArray</span>(<span class="st">&quot;hypotheses&quot;</span>);

               <span class="kw">if</span> (hypotheses!= <span class="kw">null</span> &amp;&amp; hypotheses.<span class="fu">length</span>() &gt; <span class="dv">0</span>){

                   JSONObject hypot = hypotheses.<span class="fu">optJSONObject</span>(<span class="dv">0</span>);

                   String speechText = hypot.<span class="fu">getString</span>(<span class="st">&quot;utterance&quot;</span>);

                   Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NONE,<span class="dv">0</span>,speechText);

                   mHandler.<span class="fu">sendMessage</span>(msg);

               }

           }<span class="kw">else</span> <span class="kw">if</span> (status == <span class="dv">4</span>){

               Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NO_SPEECH,<span class="dv">0</span>);

               mHandler.<span class="fu">sendMessage</span>(msg);

           }<span class="kw">else</span> <span class="kw">if</span> (status == <span class="dv">5</span>){

               Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_NO_MATCH,<span class="dv">0</span>);

               mHandler.<span class="fu">sendMessage</span>(msg);

           }

       }<span class="kw">catch</span> (JSONException ex){

           JLog.<span class="fu">e</span>(TAG,<span class="st">&quot;Decode JSON error&quot;</span>,ex);

           Message msg = mHandler.<span class="fu">obtainMessage</span>(MSG_ERROR,ERR_DECODING,<span class="dv">0</span>);

           mHandler.<span class="fu">sendMessage</span>(msg);

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">startAnimationIfNeed</span>(AnimationDrawable animationDrawable){

       imageView.<span class="fu">setVisibility</span>(View.<span class="fu">VISIBLE</span>);

       <span class="kw">if</span> (animationDrawable == <span class="kw">null</span>){

           imageView.<span class="fu">setBackgroundResource</span>(R.<span class="fu">anim</span>.<span class="fu">speak_view</span>);

           animationDrawable = (AnimationDrawable)imageView.<span class="fu">getBackground</span>();

       }


       <span class="kw">if</span> (animationDrawable != <span class="kw">null</span> &amp;&amp; !animationDrawable.<span class="fu">isRunning</span>()){

           animationDrawable.<span class="fu">start</span>();

       }

   }


   <span class="kw">private</span> <span class="dt">void</span> <span class="fu">stopAnimation</span>(AnimationDrawable animationDrawable){

       imageView.<span class="fu">setVisibility</span>(View.<span class="fu">INVISIBLE</span>);

   }

}</code></pre>

<p>必须说一句的就是里面的JLog.x是自己简单封装了下Log的类,主要是统一控制log level。BaseActivity是activity的一些常用方法的封装以及自定义的一些常量,这里用的只有几个常量:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="kw">protected</span> <span class="dt">static</span> <span class="dt">final</span> <span class="dt">int</span> GET_SPEECH_RESULT = <span class="dv">1</span>;

<span class="kw">protected</span> <span class="dt">static</span> <span class="dt">final</span> String SPEECH_RESULT_STATUS = <span class="st">&quot;speechResultStatus&quot;</span>;

<span class="kw">protected</span> <span class="dt">static</span> <span class="dt">final</span> String SPEECH_RESULT_VALUE = <span class="st">&quot;speechResultValue&quot;</span>;</code></pre>

<p>layout文件代码:</p>

<pre class="sourceCode java"><code class="sourceCode java">&lt;?xml version=<span class="st">&quot;1.0&quot;</span> encoding=<span class="st">&quot;utf-8&quot;</span>?&gt;


&lt;RelativeLayout xmlns:android=<span class="st">&quot;http://schemas.android.com/apk/res/android&quot;</span>

               android:layout_width=<span class="st">&quot;fill_parent&quot;</span>

               android:layout_height=<span class="st">&quot;fill_parent&quot;</span>

       android:background=<span class="st">&quot;#90000000&quot;</span>&gt;

   &lt;RelativeLayout

           android:layout_width=<span class="st">&quot;fill_parent&quot;</span>

           android:layout_height=<span class="st">&quot;wrap_content&quot;</span>

           android:layout_centerInParent=<span class="st">&quot;true&quot;</span>&gt;

       &lt;LinearLayout

               android:layout_width=<span class="st">&quot;240dp&quot;</span>

               android:layout_height=<span class="st">&quot;wrap_content&quot;</span>

               android:orientation=<span class="st">&quot;vertical&quot;</span>

               android:layout_centerHorizontal=<span class="st">&quot;true&quot;</span>&gt;

       &lt;/LinearLayout&gt;

   &lt;/RelativeLayout&gt;

   &lt;RelativeLayout

           android:id=<span class="st">&quot;@+id/image_layout&quot;</span>

           android:layout_height=<span class="st">&quot;230dp&quot;</span>

           android:layout_width=<span class="st">&quot;230dp&quot;</span>

           android:layout_centerInParent=<span class="st">&quot;true&quot;</span>&gt;

       &lt;ImageView

               android:id=<span class="st">&quot;@+id/iv_speaking&quot;</span>

               android:layout_height=<span class="st">&quot;wrap_content&quot;</span>

               android:layout_width=<span class="st">&quot;wrap_content&quot;</span>

               android:layout_centerInParent=<span class="st">&quot;true&quot;</span>&gt;

       &lt;/ImageView&gt;

       &lt;ImageView

               android:layout_height=<span class="st">&quot;wrap_content&quot;</span>

               android:layout_width=<span class="st">&quot;wrap_content&quot;</span>

               android:layout_centerInParent=<span class="st">&quot;true&quot;</span>

               android:background=<span class="st">&quot;@drawable/ic_speech&quot;</span>&gt;

       &lt;/ImageView&gt;

       &lt;TextView

               android:id=<span class="st">&quot;@+id/tv_result&quot;</span>

               android:layout_height=<span class="st">&quot;wrap_content&quot;</span>

               android:layout_width=<span class="st">&quot;wrap_content&quot;</span>

               android:textColor=<span class="st">&quot;#FFFFFFFF&quot;</span>

               android:textSize=<span class="st">&quot;14sp&quot;</span>

               android:singleLine=<span class="st">&quot;true&quot;</span>

               android:ellipsize=<span class="st">&quot;marquee&quot;</span>

               android:marqueeRepeatLimit=<span class="st">&quot;marquee_forever&quot;</span>

               android:layout_marginTop=<span class="st">&quot;40dip&quot;</span>

               android:layout_centerInParent=<span class="st">&quot;true&quot;</span>&gt;

       &lt;/TextView&gt;

   &lt;/RelativeLayout&gt;

&lt;/RelativeLayout&gt;</code></pre>

<p> 整个layout的背景是设置的#90000000,就是黑色的半透明。</p>

<p>speak animation的代码:</p>

<pre class="sourceCode java"><code class="sourceCode java">&lt;?xml version=<span class="st">&quot;1.0&quot;</span> encoding=<span class="st">&quot;utf-8&quot;</span>?&gt;


&lt;animation-list android:oneshot=<span class="st">&quot;false&quot;</span>

               xmlns:android=<span class="st">&quot;http://schemas.android.com/apk/res/android&quot;</span>&gt;

   &lt;item android:duration=<span class="st">&quot;150&quot;</span> android:drawable=<span class="st">&quot;@drawable/mic_1&quot;</span> /&gt;

   &lt;item android:duration=<span class="st">&quot;150&quot;</span> android:drawable=<span class="st">&quot;@drawable/mic_2&quot;</span> /&gt;

   &lt;item android:duration=<span class="st">&quot;150&quot;</span> android:drawable=<span class="st">&quot;@drawable/mic_3&quot;</span> /&gt;

   &lt;item android:duration=<span class="st">&quot;150&quot;</span> android:drawable=<span class="st">&quot;@drawable/mic_4&quot;</span> /&gt;

&lt;/animation-list&gt;</code></pre>

<p>  其实就是几张半透明的从小到大的圆圈。</p>

<p>至于调用就很简单了:</p>

<pre class="sourceCode java"><code class="sourceCode java">ib_Speak = (ImageButton)<span class="fu">findViewById</span>(R.<span class="fu">id</span>.<span class="fu">main_bottom_bar_ib_speak</span>);

      ib_Speak.<span class="fu">setOnClickListener</span>(<span class="kw">new</span> View.<span class="fu">OnClickListener</span>() {

          <span class="fu">@Override</span>

          <span class="kw">public</span> <span class="dt">void</span> <span class="fu">onClick</span>(View view) {

              Intent intent = <span class="kw">new</span> <span class="fu">Intent</span>(MainActivity.<span class="fu">this</span>,SpeechRecognitionActivity.<span class="fu">class</span>);

              <span class="fu">startActivityForResult</span>(intent, GET_SPEECH_RESULT);

              <span class="co">//Intent intent = new Intent(MainActivity.this,Record.class);</span>

              <span class="co">//startActivity(intent);</span>

          }

      });</code></pre>

<p>获取结果:</p>

<pre class="sourceCode java"><code class="sourceCode java"><span class="fu">@Override</span>

<span class="kw">protected</span> <span class="dt">void</span> <span class="fu">onActivityResult</span>(<span class="dt">int</span> requestCode, <span class="dt">int</span> resultCode, Intent data){

   <span class="kw">if</span> (requestCode == GET_SPEECH_RESULT){

       <span class="kw">if</span> (resultCode == RESULT_CANCELED){

           <span class="co">//do nothing for now</span>

       }<span class="kw">else</span> <span class="kw">if</span> (resultCode == RESULT_OK){

           JLog.<span class="fu">i</span>(<span class="st">&quot;status;&quot;</span>+ data.<span class="fu">getIntExtra</span>(SPEECH_RESULT_STATUS,<span class="dv">0</span>));

           <span class="kw">switch</span> (data.<span class="fu">getIntExtra</span>(SPEECH_RESULT_STATUS,<span class="dv">0</span>)){

               <span class="kw">case</span> SpeechRecognitionActivity.<span class="fu">ERR_NONE</span>:

                   String text = data.<span class="fu">getStringExtra</span>(SPEECH_RESULT_VALUE);

                   <span class="kw">if</span> (text != <span class="kw">null</span> &amp;&amp; text.<span class="fu">trim</span>().<span class="fu">length</span>() &gt; <span class="dv">0</span>){

                       <span class="fu">submitText</span>(text);

                   }

                   <span class="kw">break</span>;

               <span class="kw">default</span>:

                   Toast.<span class="fu">makeText</span>(<span class="kw">this</span>,R.<span class="fu">string</span>.<span class="fu">error</span>,Toast.<span class="fu">LENGTH_SHORT</span>).<span class="fu">show</span>();

                   <span class="kw">break</span>;

           }

       }

   }

}</code></pre>


你可能感兴趣的:(android,Google,语音识别)