最近看到了语音识别和合成的博客,很多平台都提供了语音识别/合成的接口,比如百度和讯飞。感觉讯飞提供的接口更标准更丰富,详细内容可以看官网:讯飞开放平台。结合了另一篇博客《用Java跟图灵机器人对话,如查询天气》,可以做成对话机器人。
这里简单些贴以下代码,其他参考资源见文末的链接。
MCS.java 这个类主要识别指定的test.pcm文件,返回识别结果,辅助类见后面。
package com.sap.ward.XunfeiVoice;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import com.iflytek.cloud.speech.RecognizerListener;
import com.iflytek.cloud.speech.RecognizerResult;
import com.iflytek.cloud.speech.SpeechConstant;
import com.iflytek.cloud.speech.SpeechError;
import com.iflytek.cloud.speech.SpeechEvent;
import com.iflytek.cloud.speech.SpeechRecognizer;
import com.iflytek.cloud.speech.SpeechSynthesizer;
import com.iflytek.cloud.speech.SpeechUtility;
import com.iflytek.cloud.speech.SynthesizeToUriListener;
public class MSC
{
private static final String APPID = "*********";// 去官网注册,换成自己的就行
private static MSC mObject;
private static StringBuffer mResult = new StringBuffer();
private String sRecognizeResults;
private boolean bFinishedSynthesize = false;// 合成完成标志位
private String sVoiceName = "xiaofeng";// Default voice name
public static void main(String[] args)
{
DebugLog.SetLogVisible(true);// 显示调试信息
SpeechUtility.createUtility("appid=" + APPID);
MSC mscObj = getMscObj();
mscObj.Recognize("test.pcm", 16000);// 指定文件名和采样率
while (!mscObj.mIsEndOfSpeech) {
DebugLog.Log("waitting....");
}
// getMscObj().waitupLoop();
DebugLog.Log("最后的识别结果是:" + mscObj.sRecognizeResults);
// 合成语音
mscObj.Synthesize(mscObj.sRecognizeResults, "tts.test.pcm");
while (!mscObj.bFinishedSynthesize)// 等待合成完成,则播放合成的pcm文件
{
DebugLog.Log("等待合成完成……");
}
Player.Play("tts_test.pcm");// 播放合成后的那文件
}
public MSC(String appid)
{
DebugLog.SetLogVisible(true);// 默认显示调试信息
SpeechUtility.createUtility("appid=" + appid);
}
/**
*
* @param sAppid
* @param pcmFileName
* @param sampleRate
* @return
*/
public String RecognizeFromCloud(String pcmFileName, int sampleRate)
{
Recognize(pcmFileName, sampleRate);
while (!mIsEndOfSpeech) {
DebugLog.Log("waitting....");
}
// getMscObj().waitupLoop();
DebugLog.Log("最后的识别结果是:" + sRecognizeResults);
return sRecognizeResults;
}
/**
*
* @param strToSynthesize
* 需要翻译的文字
* @param filePath
* 翻译的文件保存路径
*/
public void SynthesizeFromCloud(String strToSynthesize, String filePath)
{
SynthesizeFromCloud(strToSynthesize, filePath, sVoiceName);// 使用默认发音人
}
/**
*
* @param strToSynthesize
* 需要翻译的文字
* @param filePath
* 翻译的文件保存路径
* @param voiceName
* 发音人
*/
public void SynthesizeFromCloud(String strToSynthesize, String filePath,
String voiceName)
{
// 合成语音
if ("" != voiceName)
sVoiceName = voiceName;
Synthesize(strToSynthesize, filePath);
while (!bFinishedSynthesize)// 等待合成完成,则播放合成的pcm文件
{
DebugLog.Log("等待合成完成……");
}
}
public static MSC getMscObj()
{
if (mObject == null)
mObject = new MSC("591932d2");
return mObject;
}
/**
* 听写
*/
public boolean mIsEndOfSpeech = false;
private void Recognize(String pcmFileName, int sampleRate)
{
sRecognizeResults = "";// 清空识别结果
if (SpeechRecognizer.getRecognizer() == null)
SpeechRecognizer.createRecognizer();
mIsEndOfSpeech = false;
RecognizePcmfileByte(pcmFileName, sampleRate);
}
/**
* 自动化测试注意要点 如果直接从音频文件识别,需要模拟真实的音速,防止音频队列的堵塞
*/
public void RecognizePcmfileByte(String pcmFileName, int sampleRate)
{
SpeechRecognizer recognizer = SpeechRecognizer.getRecognizer();
recognizer.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
recognizer.setParameter(SpeechConstant.AUDIO_FORMAT, "pcm");
// 写音频流时,文件是应用层已有的,不必再保存
// recognizer.setParameter(SpeechConstant.ASR_AUDIO_PATH,
// "./iat_test.pcm");
recognizer.setParameter(SpeechConstant.RESULT_TYPE, "plain");
recognizer.setParameter(SpeechConstant.SAMPLE_RATE,
(String) "" + sampleRate);
recognizer.startListening(recListener);
FileInputStream fis = null;
final byte[] buffer = new byte[64 * 1024];
try {
fis = new FileInputStream(new File(pcmFileName));
if (0 == fis.available()) {
mResult.append("no audio avaible!");
recognizer.cancel();
} else {
int lenRead = buffer.length;
while (buffer.length == lenRead && !mIsEndOfSpeech) {
lenRead = fis.read(buffer);
recognizer.writeAudio(buffer, 0, lenRead);
} // end of while
recognizer.stopListening();
}
}
catch (Exception e) {
e.printStackTrace();
}
finally {
try {
if (null != fis) {
fis.close();
fis = null;
}
}
catch (IOException e) {
e.printStackTrace();
}
} // end of try-catch-finally
}
/**
* 听写监听器
*/
private RecognizerListener recListener = new RecognizerListener()
{
public void onBeginOfSpeech()
{
DebugLog.Log("onBeginOfSpeech enter");
DebugLog.Log("*************开始录音*************");
}
public void onEndOfSpeech()
{
DebugLog.Log("onEndOfSpeech enter");
mIsEndOfSpeech = true;
}
public void onVolumeChanged(int volume)
{
DebugLog.Log("onVolumeChanged enter");
if (volume > 0)
DebugLog.Log("*************音量值:" + volume + "*************");
}
public void onResult(RecognizerResult result, boolean islast)
{
DebugLog.Log("onResult enter");
mResult.append(result.getResultString());
if (islast) {
// DebugLog.Log("识别结果为:" + mResult.toString());
sRecognizeResults = mResult.toString();
mIsEndOfSpeech = true;
mResult.delete(0, mResult.length());
waitupLoop();
}
}
public void onError(SpeechError error)
{
mIsEndOfSpeech = true;
DebugLog.Log(
"*************" + error.getErrorCode() + "*************");
waitupLoop();
}
public void onEvent(int eventType, int arg1, int agr2, String msg)
{
DebugLog.Log("onEvent enter");
}
};
// *************************************无声合成*************************************
/**
*
* @param wordToSynthesize
* @param filePath
* 文件路径
*/
private void Synthesize(String wordToSynthesize, String filePath)
{
bFinishedSynthesize = false; // 完成标志设置为false
SpeechSynthesizer speechSynthesizer = SpeechSynthesizer
.createSynthesizer();
// 设置发音人
speechSynthesizer.setParameter(SpeechConstant.VOICE_NAME, sVoiceName);
// 启用合成音频流事件,不需要时,不用设置此参数
speechSynthesizer.setParameter(SpeechConstant.TTS_BUFFER_EVENT, "1");
// 设置合成音频保存位置(可自定义保存位置),默认不保存
speechSynthesizer.synthesizeToUri(wordToSynthesize, filePath,
synthesizeToUriListener);
}
/**
* 合成监听器
*/
SynthesizeToUriListener synthesizeToUriListener = new SynthesizeToUriListener()
{
public void onBufferProgress(int progress)
{
DebugLog.Log("*************合成进度*************" + progress);
}
public void onSynthesizeCompleted(String uri, SpeechError error)
{
if (error == null) {
DebugLog.Log("*************合成成功*************");
DebugLog.Log("合成音频生成路径:" + uri);
bFinishedSynthesize = true;// 合成完成
} else
DebugLog.Log("*************" + error.getErrorCode()
+ "*************");
waitupLoop();
}
public void onEvent(int eventType, int arg1, int arg2, int arg3,
Object obj1, Object obj2)
{
if (SpeechEvent.EVENT_TTS_BUFFER == eventType) {
DebugLog.Log("onEvent: type=" + eventType + ", arg1=" + arg1
+ ", arg2=" + arg2 + ", arg3=" + arg3 + ", obj2="
+ (String) obj2);
ArrayList> bufs = null;
if (obj1 instanceof ArrayList>) {
bufs = (ArrayList>) obj1;
} else {
DebugLog.Log("onEvent error obj1 is not ArrayList !");
} // end
// of
// if-else
// instance
// of
// ArrayList
if (null != bufs) {
for (final Object obj : bufs) {
if (obj instanceof byte[]) {
final byte[] buf = (byte[]) obj;
DebugLog.Log("onEvent buf length: " + buf.length);
} else {
DebugLog.Log(
"onEvent error element is not byte[] !");
}
} // end
// of
// for
} // end of if bufs not null
} // end of if tts buffer event
// if (SpeechEvent. == eventType) {
//
// }
}
};
private void waitupLoop()
{
synchronized (this) {
MSC.this.notify();
}
}
}
package com.sap.ward.XunfeiVoice;
import java.text.SimpleDateFormat;
public class DebugLog
{
private static boolean bBlogVisible = true;
public static void SetLogVisible(boolean visible)
{
bBlogVisible = visible;
}
public static void Log(String tag, String log)
{
if (bBlogVisible)
System.out.println(log);
}
public static void Log(String log)
{
SimpleDateFormat dateFormat = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
String date = dateFormat.format(new java.util.Date());
if (bBlogVisible)
System.out.println("<" + date + ">" + log);
}
public static boolean isEmpty(String string)
{
if (string == null) {
return true;
}
if (string.isEmpty()) {
return true;
}
return false;
}
}
“讯飞语音+”语音识别开放功能使用方法介绍
科大讯飞语音云java版语音识别sdk的简单使用示例
科大讯飞语音识别(获取音频流文件中文字)