使用Java的讯飞语音识别示例

简单介绍

最近看到了语音识别和合成的博客,很多平台都提供了语音识别/合成的接口,比如百度和讯飞。感觉讯飞提供的接口更标准更丰富,详细内容可以看官网:讯飞开放平台。结合了另一篇博客《用Java跟图灵机器人对话,如查询天气》,可以做成对话机器人。

这里简单些贴以下代码,其他参考资源见文末的链接。

代码

MCS.java  这个类主要识别指定的test.pcm文件,返回识别结果,辅助类见后面。

package com.sap.ward.XunfeiVoice;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;

import com.iflytek.cloud.speech.RecognizerListener;
import com.iflytek.cloud.speech.RecognizerResult;
import com.iflytek.cloud.speech.SpeechConstant;
import com.iflytek.cloud.speech.SpeechError;
import com.iflytek.cloud.speech.SpeechEvent;
import com.iflytek.cloud.speech.SpeechRecognizer;
import com.iflytek.cloud.speech.SpeechSynthesizer;
import com.iflytek.cloud.speech.SpeechUtility;
import com.iflytek.cloud.speech.SynthesizeToUriListener;

public class MSC
{
	private static final String APPID = "*********";// 去官网注册,换成自己的就行
	private static MSC mObject;
	private static StringBuffer mResult = new StringBuffer();
	private String sRecognizeResults;
	private boolean bFinishedSynthesize = false;// 合成完成标志位
	private String sVoiceName = "xiaofeng";// Default voice name

	public static void main(String[] args)
	{

		DebugLog.SetLogVisible(true);// 显示调试信息

		SpeechUtility.createUtility("appid=" + APPID);
		MSC mscObj = getMscObj();
		mscObj.Recognize("test.pcm", 16000);// 指定文件名和采样率
		while (!mscObj.mIsEndOfSpeech) {
			DebugLog.Log("waitting....");
		}
		// getMscObj().waitupLoop();
		DebugLog.Log("最后的识别结果是:" + mscObj.sRecognizeResults);

		// 合成语音
		mscObj.Synthesize(mscObj.sRecognizeResults, "tts.test.pcm");

		while (!mscObj.bFinishedSynthesize)// 等待合成完成,则播放合成的pcm文件
		{
			DebugLog.Log("等待合成完成……");
		}

		Player.Play("tts_test.pcm");// 播放合成后的那文件

	}

	public MSC(String appid)
	{
		DebugLog.SetLogVisible(true);// 默认显示调试信息
		SpeechUtility.createUtility("appid=" + appid);
	}

	/**
	 * 
	 * @param sAppid
	 * @param pcmFileName
	 * @param sampleRate
	 * @return
	 */
	public String RecognizeFromCloud(String pcmFileName, int sampleRate)
	{
		Recognize(pcmFileName, sampleRate);
		while (!mIsEndOfSpeech) {
			DebugLog.Log("waitting....");
		}
		// getMscObj().waitupLoop();
		DebugLog.Log("最后的识别结果是:" + sRecognizeResults);
		return sRecognizeResults;
	}

	/**
	 * 
	 * @param strToSynthesize
	 *            需要翻译的文字
	 * @param filePath
	 *            翻译的文件保存路径
	 */
	public void SynthesizeFromCloud(String strToSynthesize, String filePath)
	{
		SynthesizeFromCloud(strToSynthesize, filePath, sVoiceName);// 使用默认发音人
	}

	/**
	 * 
	 * @param strToSynthesize
	 *            需要翻译的文字
	 * @param filePath
	 *            翻译的文件保存路径
	 * @param voiceName
	 *            发音人
	 */
	public void SynthesizeFromCloud(String strToSynthesize, String filePath,
			String voiceName)
	{
		// 合成语音
		if ("" != voiceName)
			sVoiceName = voiceName;
		Synthesize(strToSynthesize, filePath);

		while (!bFinishedSynthesize)// 等待合成完成,则播放合成的pcm文件
		{
			DebugLog.Log("等待合成完成……");
		}
	}

	public static MSC getMscObj()
	{
		if (mObject == null)
			mObject = new MSC("591932d2");
		return mObject;
	}

	/**
	 * 听写
	 */

	public boolean mIsEndOfSpeech = false;

	private void Recognize(String pcmFileName, int sampleRate)
	{
		sRecognizeResults = "";// 清空识别结果
		if (SpeechRecognizer.getRecognizer() == null)
			SpeechRecognizer.createRecognizer();
		mIsEndOfSpeech = false;
		RecognizePcmfileByte(pcmFileName, sampleRate);
	}

	/**
	 * 自动化测试注意要点 如果直接从音频文件识别,需要模拟真实的音速,防止音频队列的堵塞
	 */
	public void RecognizePcmfileByte(String pcmFileName, int sampleRate)
	{
		SpeechRecognizer recognizer = SpeechRecognizer.getRecognizer();
		recognizer.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
		recognizer.setParameter(SpeechConstant.AUDIO_FORMAT, "pcm");

		// 写音频流时,文件是应用层已有的,不必再保存
		// recognizer.setParameter(SpeechConstant.ASR_AUDIO_PATH,
		// "./iat_test.pcm");
		recognizer.setParameter(SpeechConstant.RESULT_TYPE, "plain");
		recognizer.setParameter(SpeechConstant.SAMPLE_RATE,
				(String) "" + sampleRate);
		recognizer.startListening(recListener);

		FileInputStream fis = null;
		final byte[] buffer = new byte[64 * 1024];
		try {
			fis = new FileInputStream(new File(pcmFileName));
			if (0 == fis.available()) {
				mResult.append("no audio avaible!");
				recognizer.cancel();
			} else {
				int lenRead = buffer.length;
				while (buffer.length == lenRead && !mIsEndOfSpeech) {
					lenRead = fis.read(buffer);
					recognizer.writeAudio(buffer, 0, lenRead);
				} // end of while

				recognizer.stopListening();
			}

		}
		catch (Exception e) {
			e.printStackTrace();
		}
		finally {
			try {
				if (null != fis) {
					fis.close();
					fis = null;
				}
			}
			catch (IOException e) {
				e.printStackTrace();
			}
		} // end of try-catch-finally

	}

	/**
	 * 听写监听器
	 */
	private RecognizerListener recListener = new RecognizerListener()
	{

		public void onBeginOfSpeech()
		{
			DebugLog.Log("onBeginOfSpeech enter");
			DebugLog.Log("*************开始录音*************");
		}

		public void onEndOfSpeech()
		{
			DebugLog.Log("onEndOfSpeech enter");
			mIsEndOfSpeech = true;
		}

		public void onVolumeChanged(int volume)
		{
			DebugLog.Log("onVolumeChanged enter");
			if (volume > 0)
				DebugLog.Log("*************音量值:" + volume + "*************");

		}

		public void onResult(RecognizerResult result, boolean islast)
		{
			DebugLog.Log("onResult enter");
			mResult.append(result.getResultString());

			if (islast) {
				// DebugLog.Log("识别结果为:" + mResult.toString());
				sRecognizeResults = mResult.toString();
				mIsEndOfSpeech = true;
				mResult.delete(0, mResult.length());
				waitupLoop();
			}
		}

		public void onError(SpeechError error)
		{
			mIsEndOfSpeech = true;
			DebugLog.Log(
					"*************" + error.getErrorCode() + "*************");
			waitupLoop();
		}

		public void onEvent(int eventType, int arg1, int agr2, String msg)
		{
			DebugLog.Log("onEvent enter");
		}

	};
	// *************************************无声合成*************************************

	/**
	 * 
	 * @param wordToSynthesize
	 * @param filePath
	 *            文件路径
	 */
	private void Synthesize(String wordToSynthesize, String filePath)
	{
		bFinishedSynthesize = false; // 完成标志设置为false
		SpeechSynthesizer speechSynthesizer = SpeechSynthesizer
				.createSynthesizer();
		// 设置发音人
		speechSynthesizer.setParameter(SpeechConstant.VOICE_NAME, sVoiceName);

		// 启用合成音频流事件,不需要时,不用设置此参数
		speechSynthesizer.setParameter(SpeechConstant.TTS_BUFFER_EVENT, "1");
		// 设置合成音频保存位置(可自定义保存位置),默认不保存
		speechSynthesizer.synthesizeToUri(wordToSynthesize, filePath,
				synthesizeToUriListener);
	}

	/**
	 * 合成监听器
	 */
	SynthesizeToUriListener synthesizeToUriListener = new SynthesizeToUriListener()
	{

		public void onBufferProgress(int progress)
		{
			DebugLog.Log("*************合成进度*************" + progress);

		}

		public void onSynthesizeCompleted(String uri, SpeechError error)
		{
			if (error == null) {
				DebugLog.Log("*************合成成功*************");
				DebugLog.Log("合成音频生成路径:" + uri);
				bFinishedSynthesize = true;// 合成完成

			} else
				DebugLog.Log("*************" + error.getErrorCode()
						+ "*************");
			waitupLoop();

		}

		public void onEvent(int eventType, int arg1, int arg2, int arg3,
				Object obj1, Object obj2)
		{
			if (SpeechEvent.EVENT_TTS_BUFFER == eventType) {
				DebugLog.Log("onEvent: type=" + eventType + ", arg1=" + arg1
						+ ", arg2=" + arg2 + ", arg3=" + arg3 + ", obj2="
						+ (String) obj2);
				ArrayList bufs = null;
				if (obj1 instanceof ArrayList) {
					bufs = (ArrayList) obj1;
				} else {
					DebugLog.Log("onEvent error obj1 is not ArrayList !");
				} // end
					// of
					// if-else
					// instance
					// of
					// ArrayList

				if (null != bufs) {
					for (final Object obj : bufs) {
						if (obj instanceof byte[]) {
							final byte[] buf = (byte[]) obj;
							DebugLog.Log("onEvent buf length: " + buf.length);
						} else {
							DebugLog.Log(
									"onEvent error element is not byte[] !");
						}
					} // end
						// of
						// for
				} // end of if bufs not null
			} // end of if tts buffer event

			// if (SpeechEvent. == eventType) {
			//
			// }
		}

	};

	private void waitupLoop()
	{
		synchronized (this) {
			MSC.this.notify();
		}
	}

}

DebugLog.java 辅助类,输出调试日志,可以选择关闭

package com.sap.ward.XunfeiVoice;

import java.text.SimpleDateFormat;

public class DebugLog
{
	private static boolean bBlogVisible = true;

	public static void SetLogVisible(boolean visible)
	{
		bBlogVisible = visible;
	}

	public static void Log(String tag, String log)
	{
		if (bBlogVisible)
			System.out.println(log);
	}

	public static void Log(String log)
	{
		SimpleDateFormat dateFormat = new SimpleDateFormat(
				"yyyy-MM-dd HH:mm:ss");
		String date = dateFormat.format(new java.util.Date());
		if (bBlogVisible)
			System.out.println("<" + date + ">" + log);
	}

	public static boolean isEmpty(String string)
	{
		if (string == null) {
			return true;
		}
		if (string.isEmpty()) {
			return true;
		}
		return false;
	}
}

参考连接:

“讯飞语音+”语音识别开放功能使用方法介绍
科大讯飞语音云java版语音识别sdk的简单使用示例

科大讯飞语音识别(获取音频流文件中文字)



你可能感兴趣的:(JAVA)