因为公司的业务的需要,对三家的语音识别(简短语句识别java版)进行了调用和对比,把自己的测试成果贴出来供需要的人参考使用.并贴出主要代码块
package com.alibaba.idst.nls.demo;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import com.alibaba.idst.nls.NlsClient;
import com.alibaba.idst.nls.NlsFuture;
import com.alibaba.idst.nls.event.NlsEvent;
import com.alibaba.idst.nls.event.NlsListener;
import com.alibaba.idst.nls.protocol.NlsRequest;
import com.alibaba.idst.nls.protocol.NlsResponse;
public class AsrDemo implements NlsListener {
private static NlsClient client = new NlsClient();
private String akId;
private String akSecret;
public AsrDemo(String akId, String akSecret) {
System.out.println("init Nls client...");
this.akId = akId;
this.akSecret = akSecret;
// 初始化NlsClient
client.init();
}
public void startAsr() {
// 开始发送语音
System.out.println("open audio file...");
InputStream fis = null;
try {
//不知道为什么用官方demo的类加载取不到fis,后来又自己写的,求解.
// fis = this.getClass().getClassLoader().getResourceAsStream("E:\\1.pcm");
String filepath = "E:\\1.pcm";
File file = new File(filepath);
fis = new FileInputStream(file);
System.out.println(fis);
} catch (Exception e) {
e.printStackTrace();
}
if (fis != null) {
System.out.println("create NLS future");
try {
NlsRequest req = new NlsRequest();
req.setAppKey("nls-service"); // appkey请从 "快速开始" 帮助页面的appkey列表中获取
req.setAsrFormat("pcm"); // 设置语音文件格式为pcm,我们支持16k 16bit 的无头的pcm文件。
/* 热词相关配置 */
// req.setAsrVocabularyId("热词词表id");//热词词表id
/* 热词相关配置 */
req.authorize("ak", "as"); // 请替换为用户申请到的Access
// Key ID和Access
// Key
// Secret
NlsFuture future = client.createNlsFuture(req, this); // 实例化请求,传入请求和监听器
System.out.println("call NLS service");
byte[] b = new byte[8000];
int len = 0;
while ((len = fis.read(b)) > 0) {
future.sendVoice(b, 0, len); // 发送语音数据
Thread.sleep(50);//
}
future.sendFinishSignal(); // 语音识别结束时,发送结束符
System.out.println("main thread enter waiting for less than 10s.");
future.await(10000); // 设置服务端结果返回的超时时间
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("calling NLS service end");
}
}
public void shutDown() {
System.out.println("close NLS client");
// 关闭客户端并释放资源
client.close();
System.out.println("demo done");
}
@Override
public void onMessageReceived(NlsEvent e) {
// 识别结果的回调
NlsResponse response = e.getResponse();
String result = "";// 先初始化,
int statusCode = response.getStatus_code();
if (response.getAsr_ret() != null) {
System.out.println("11111111111111111111111");
result += "\nget asr result: statusCode=[" + statusCode + "], " + response.getAsr_ret();
}
if (result != null) {
System.out.println("2222222222222222222222");
System.out.println(result);
System.out.println("33333333333333333333333");
} else {
System.out.println(response.jsonResults.toString());
}
}
@Override
public void onOperationFailed(NlsEvent e) {
// 识别失败的回调
String result = "";
result += "on operation failed: statusCode=[" + e.getResponse().getStatus_code() + "], " + e.getErrorMessage();
System.out.println(result);
}
@Override
public void onChannelClosed(NlsEvent e) {
// socket 连接关闭的回调
System.out.println("on websocket closed.");
}
public static void main(String[] args) {
String akId = "ak";//自己的秘钥
String akSecret = "as";
AsrDemo asrDemo = new AsrDemo(akId, akSecret);
asrDemo.startAsr();
asrDemo.shutDown();
}
}
/**
*
*/
package baidu.ocr.aip;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import javax.xml.bind.DatatypeConverter;
import org.json.JSONObject;
/**
* @author Administrator
*
*/
public class speech {
private static final String serverURL = "http://vop.baidu.com/server_api";
private static String token = "";
private static final String testFileName = "E:\\1.pcm"; // 百度语音提供技术支持
// put your own params here
// 下面3个值要填写自己申请的app对应的值
private static final String apiKey = "";
private static final String secretKey = "";
private static final String cuid = "84-EF-18-C7-1F-AF";//查看本机的mac值,物理地址,在命令栏查看ip地址的时候就能看到.
public static void main(String[] args) throws Exception {
getToken();
method1();
// method2();
}
private static void getToken() throws Exception {
String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" + "&client_id="
+ apiKey + "&client_secret=" + secretKey;
HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
token = new JSONObject(printResponse(conn)).getString("access_token");
}
private static void method1() throws Exception {
File pcmFile = new File(testFileName);
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();
// construct params
JSONObject params = new JSONObject();
params.put("format", "pcm");//文件格式
params.put("rate", 16000);//可以更改采样率
params.put("channel", "1");
params.put("token", token);
params.put("lan", "zh");
params.put("cuid", cuid);
params.put("len", pcmFile.length());
params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));
// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
conn.setDoInput(true);
conn.setDoOutput(true);
// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.writeBytes(params.toString());
wr.flush();
wr.close();
printResponse(conn);
}
private static void method2() throws Exception {
File pcmFile = new File(testFileName);
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + "?cuid=" + cuid + "&token=" + token)
.openConnection();
// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000");
conn.setDoInput(true);
conn.setDoOutput(true);
// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.write(loadFile(pcmFile));
wr.flush();
wr.close();
System.out.println(getUtf8String(printResponse(conn)));
}
private static String printResponse(HttpURLConnection conn) throws Exception {
if (conn.getResponseCode() != 200) {
// request error
System.out.println("conn.getResponseCode() = " + conn.getResponseCode());
return "";
}
InputStream is = conn.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(is));
String line;
StringBuffer response = new StringBuffer();
while ((line = rd.readLine()) != null) {
response.append(line);
response.append('\r');
}
rd.close();
System.out.println(new JSONObject(response.toString()).toString(4));
return response.toString();
}
private static byte[] loadFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
byte[] bytes = new byte[(int) length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
offset += numRead;
}
if (offset < bytes.length) {
is.close();
throw new IOException("Could not completely read file " + file.getName());
}
is.close();
return bytes;
}
// GBK编码转为UTF-8
private static String getUtf8String(String s) throws UnsupportedEncodingException {
StringBuffer sb = new StringBuffer();
sb.append(s);
String xmlString = "";
String xmlUtf8 = "";
xmlString = new String(sb.toString().getBytes("GBK"));
xmlUtf8 = URLEncoder.encode(xmlString, "GBK");
return URLDecoder.decode(xmlUtf8, "UTF-8");
}
}
.
测试结果:
1.采用了16k的pcm无压缩的文件.
2.
3
综合来看.
三个平台对基本的语音识别都有较好的辨识度,识别率可以接收并可以商用.
好像阿里和百度小批量均免费,讯飞有时长限制企业版要收费,但是讯飞识别效果最佳,加入了标点和语气的分析,更人性化,识别效果综合感觉讯飞>百度>ali,具体使用哪种还需要看公司的选择了.
如有不足,欢迎批评指正.