java版阿里云,百度ai,讯飞语音识别效果简单对比及demo

因为公司的业务的需要,对三家的语音识别(简短语句识别java版)进行了调用和对比,把自己的测试成果贴出来供需要的人参考使用.并贴出主要代码块

阿里云的一句话识别:

package com.alibaba.idst.nls.demo;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;

import com.alibaba.idst.nls.NlsClient;
import com.alibaba.idst.nls.NlsFuture;
import com.alibaba.idst.nls.event.NlsEvent;
import com.alibaba.idst.nls.event.NlsListener;
import com.alibaba.idst.nls.protocol.NlsRequest;
import com.alibaba.idst.nls.protocol.NlsResponse;

public class AsrDemo implements NlsListener {
    private static NlsClient client = new NlsClient();
    private String akId;
    private String akSecret;

    public AsrDemo(String akId, String akSecret) {
        System.out.println("init Nls client...");
        this.akId = akId;
        this.akSecret = akSecret;
        // 初始化NlsClient
        client.init();
    }

    public void startAsr() {
        // 开始发送语音
        System.out.println("open audio file...");
        InputStream fis = null;
        try {
        //不知道为什么用官方demo的类加载取不到fis,后来又自己写的,求解.
        // fis = this.getClass().getClassLoader().getResourceAsStream("E:\\1.pcm");

            String filepath = "E:\\1.pcm";
            File file = new File(filepath);
            fis = new FileInputStream(file);

            System.out.println(fis);
        } catch (Exception e) {
            e.printStackTrace();
        }

        if (fis != null) {
            System.out.println("create NLS future");
            try {
                NlsRequest req = new NlsRequest();
                req.setAppKey("nls-service"); // appkey请从 "快速开始" 帮助页面的appkey列表中获取
                req.setAsrFormat("pcm"); // 设置语音文件格式为pcm,我们支持16k 16bit 的无头的pcm文件。

                /* 热词相关配置 */
                // req.setAsrVocabularyId("热词词表id");//热词词表id
                /* 热词相关配置 */

                req.authorize("ak", "as"); // 请替换为用户申请到的Access
                                                                                        // Key ID和Access
                                                                                        // Key
                // Secret
NlsFuture future = client.createNlsFuture(req, this); // 实例化请求,传入请求和监听器
                System.out.println("call NLS service");
                byte[] b = new byte[8000];
                int len = 0;
                while ((len = fis.read(b)) > 0) {
                    future.sendVoice(b, 0, len); // 发送语音数据
                    Thread.sleep(50);//
                }
                future.sendFinishSignal(); // 语音识别结束时,发送结束符
                System.out.println("main thread enter waiting for less than 10s.");
                future.await(10000); // 设置服务端结果返回的超时时间
            } catch (Exception e) {
                e.printStackTrace();
            }
            System.out.println("calling NLS service end");
        }
    }

    public void shutDown() {
        System.out.println("close NLS client");
        // 关闭客户端并释放资源
        client.close();
        System.out.println("demo done");
    }

    @Override
    public void onMessageReceived(NlsEvent e) {
        // 识别结果的回调
        NlsResponse response = e.getResponse();
        String result = "";// 先初始化,
        int statusCode = response.getStatus_code();
        if (response.getAsr_ret() != null) {
            System.out.println("11111111111111111111111");
            result += "\nget asr result: statusCode=[" + statusCode + "], " + response.getAsr_ret();
        }
        if (result != null) {
            System.out.println("2222222222222222222222");
            System.out.println(result);
            System.out.println("33333333333333333333333");
        } else {
            System.out.println(response.jsonResults.toString());
        }
    }

    @Override
    public void onOperationFailed(NlsEvent e) {
        // 识别失败的回调
        String result = "";
        result += "on operation failed: statusCode=[" + e.getResponse().getStatus_code() + "], " + e.getErrorMessage();
        System.out.println(result);
    }

    @Override
    public void onChannelClosed(NlsEvent e) {
        // socket 连接关闭的回调
        System.out.println("on websocket closed.");
    }

    public static void main(String[] args) {
        String akId = "ak";//自己的秘钥
        String akSecret = "as";
        AsrDemo asrDemo = new AsrDemo(akId, akSecret);
        asrDemo.startAsr();
        asrDemo.shutDown();
    }
}

百度Ai智能语音识别

/**
 * 
 */
package baidu.ocr.aip;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;

import javax.xml.bind.DatatypeConverter;

import org.json.JSONObject;

/**
 * @author Administrator
 *
 */
public class speech {

    private static final String serverURL = "http://vop.baidu.com/server_api";
    private static String token = "";
    private static final String testFileName = "E:\\1.pcm"; // 百度语音提供技术支持
    // put your own params here
    // 下面3个值要填写自己申请的app对应的值
    private static final String apiKey = "";
    private static final String secretKey = "";
    private static final String cuid = "84-EF-18-C7-1F-AF";//查看本机的mac值,物理地址,在命令栏查看ip地址的时候就能看到.

    public static void main(String[] args) throws Exception {
        getToken();
        method1();
        // method2();
    }

    private static void getToken() throws Exception {
        String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" + "&client_id="
                + apiKey + "&client_secret=" + secretKey;
        HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
        token = new JSONObject(printResponse(conn)).getString("access_token");
    }

    private static void method1() throws Exception {
        File pcmFile = new File(testFileName);
        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();

        // construct params
        JSONObject params = new JSONObject();
        params.put("format", "pcm");//文件格式
        params.put("rate", 16000);//可以更改采样率
        params.put("channel", "1");
        params.put("token", token);
        params.put("lan", "zh");
        params.put("cuid", cuid);
        params.put("len", pcmFile.length());
        params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));

        // add request header
        conn.setRequestMethod("POST");
        conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");

        conn.setDoInput(true);
        conn.setDoOutput(true);

        // send request
        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
        wr.writeBytes(params.toString());
        wr.flush();
        wr.close();

        printResponse(conn);
    }

    private static void method2() throws Exception {
        File pcmFile = new File(testFileName);
        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + "?cuid=" + cuid + "&token=" + token)
                .openConnection();

        // add request header
        conn.setRequestMethod("POST");
        conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000");

        conn.setDoInput(true);
        conn.setDoOutput(true);

        // send request
        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
        wr.write(loadFile(pcmFile));
        wr.flush();
        wr.close();

        System.out.println(getUtf8String(printResponse(conn)));
    }

    private static String printResponse(HttpURLConnection conn) throws Exception {
        if (conn.getResponseCode() != 200) {
            // request error
            System.out.println("conn.getResponseCode() = " + conn.getResponseCode());
            return "";
        }
        InputStream is = conn.getInputStream();
        BufferedReader rd = new BufferedReader(new InputStreamReader(is));
        String line;
        StringBuffer response = new StringBuffer();
        while ((line = rd.readLine()) != null) {
            response.append(line);
            response.append('\r');
        }
        rd.close();
        System.out.println(new JSONObject(response.toString()).toString(4));
        return response.toString();
    }

    private static byte[] loadFile(File file) throws IOException {
        InputStream is = new FileInputStream(file);

        long length = file.length();
        byte[] bytes = new byte[(int) length];

        int offset = 0;
        int numRead = 0;
        while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
            offset += numRead;
        }

        if (offset < bytes.length) {
            is.close();
            throw new IOException("Could not completely read file " + file.getName());
        }

        is.close();
        return bytes;
    }

    // GBK编码转为UTF-8
    private static String getUtf8String(String s) throws UnsupportedEncodingException {
        StringBuffer sb = new StringBuffer();
        sb.append(s);
        String xmlString = "";
        String xmlUtf8 = "";
        xmlString = new String(sb.toString().getBytes("GBK"));
        xmlUtf8 = URLEncoder.encode(xmlString, "GBK");

        return URLDecoder.decode(xmlUtf8, "UTF-8");
    }
}

讯飞Sdk很完整,拿过来就能直接用

.
测试结果:
1.采用了16k的pcm无压缩的文件.
java版阿里云,百度ai,讯飞语音识别效果简单对比及demo_第1张图片
2.java版阿里云,百度ai,讯飞语音识别效果简单对比及demo_第2张图片
3java版阿里云,百度ai,讯飞语音识别效果简单对比及demo_第3张图片

综合来看.
三个平台对基本的语音识别都有较好的辨识度,识别率可以接收并可以商用.
好像阿里和百度小批量均免费,讯飞有时长限制企业版要收费,但是讯飞识别效果最佳,加入了标点和语气的分析,更人性化,识别效果综合感觉讯飞>百度>ali,具体使用哪种还需要看公司的选择了.

如有不足,欢迎批评指正.

你可能感兴趣的:(语音识别)