JAVA对接阿里语音识别引擎

1、官网SDK地址

https://help.aliyun.com/document_detail/84430.html?spm=a2c4g.11186623.6.581.73f65edftbwk9R

阿里的SDK比腾讯好一些,可以直接从中央仓库直接下载,并且demo 写的比较详细,在开始对接之前,我们需要仔细的看接口说明下面我们文档,这样可以减少 开始对我们对接中的坑,下面开始对接:

2、 先搞jar包

    
      com.alibaba.nls  
      nls-sdk-transcriber   
      2.1.6

3、注意事项 

  • NlsClient使用了Netty框架,NlsClient对象的创建会消耗一定时间和资源,一经创建可以重复使用。建议调用程序将NlsClient的创建和关闭与程序本身的生命周期相结合。

  • SpeechTranscriber对象不可重复使用,一个识别任务对应一个SpeechTranscriber对象。例如,N个音频文件要进行N次识别任务,创建N个SpeechTranscriber对象。

  • SpeechTranscriberListener对象和SpeechTranscriber对象是一一对应的,不能在不同SpeechTranscriber对象使用同一个SpeechTranscriberListener对象,否则不能将各识别任务区分开。

4、对接流程

4.1 NlsClient类

从上面的注意事项 我们可以看出,NlsClient需要我们单独创建,所以我们在项目启动的时候的直接创建出来:

package com.jack.chat.application.service;

import com.alibaba.nls.client.AccessToken;
import com.alibaba.nls.client.protocol.NlsClient;
import org.apache.commons.collections4.Get;

import java.io.IOException;

/**
 * @author zhenghao
 * @description:
 * @date 2020/8/1419:16
 */
public class NlsClientService {

    private static NlsClient client;

    public    NlsClientService( String id, String secret, String url) {

        //TODO 重要提示 创建NlsClient实例,应用全局创建一个即可,生命周期可和整个应用保持一致,默认服务地址为阿里云线上服务地址
        //TODO 这里简单演示了获取token 的代码,该token会过期,实际使用时注意在accessToken.getExpireTime()过期前再次获取token
        AccessToken accessToken = new AccessToken(id, secret);
        try {
            accessToken.apply();
            System.out.println("get token: " + ", expire time: " + accessToken.getExpireTime());
            // TODO 创建NlsClient实例,应用全局创建一个即可,用户指定服务地址
            if (url.isEmpty()) {
                client = new NlsClient(accessToken.getToken());
            } else {
                client = new NlsClient(url, accessToken.getToken());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static NlsClient getNlsClient() {

        return client;
    }


}
package com.jack.chat.application.service;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Service;

/**
 * @author zhenghao
 * @description: 伴随tomcat 启动
 * @date 2020/7/211:41
 */
@Service
public class ApplicationService implements ApplicationListener {

    private Logger log = LoggerFactory.getLogger(ApplicationService.class);

    public static ApplicationService application = null;
    String id = "";
    String secret = "";
    String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1

    @Override
    public void onApplicationEvent(ContextRefreshedEvent contextRefreshedEvent) {
        //启动客户端
        if (contextRefreshedEvent.getApplicationContext().getParent() == null) {
            log.info("客户端启动-------------------------->");

            synchronized (this) {
                ApplicationService.application = this;
                 new NlsClientService(id, secret, url);
                System.out.println("阿里云 nls 初始化完毕");
            }
        }
    }
}

4.2 核心类

package com.jack.chat.asrali.service;

import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import com.jack.chat.asr.model.AsrResultModel;
import com.jack.chat.fs.service.FsService;
import com.jack.chat.socket.service.SocketServer;
import com.jack.chat.socket.service.WebSocketMapUtil;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.context.ContextLoader;
import org.springframework.web.context.WebApplicationContext;

import java.io.IOException;

/**
 * @author zhenghao
 * @description:
 * @date 2020/8/1418:47
 */
public class ALiAsrConnnection {
    private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class);

    SpeechTranscriber transcriber = null;
    private  FsService fsService;
    private  String ptel;

    public void init(NlsClient client, String appKey, String tel) {
        try {
            ptel = tel;
            WebApplicationContext wac = ContextLoader.getCurrentWebApplicationContext();
            fsService = (FsService) wac.getBean("fsService");

            //创建实例,建立连接
            transcriber = new SpeechTranscriber(client, getTranscriberListener());
            transcriber.setAppKey(appKey);
            //输入音频编码方式
            transcriber.setFormat(InputFormatEnum.PCM);
            //输入音频采样率
            transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K);
            //是否返回中间识别结果
            transcriber.setEnableIntermediateResult(true);
            //是否生成并返回标点符号
            transcriber.setEnablePunctuation(true);
            //是否将返回结果规整化,比如将一百返回为100
            transcriber.setEnableITN(false);
            //设置vad断句参数,默认800ms,有效值[200, 2000]
            //transcriber.addCustomedParam("max_sentence_silence", 600);
            //设置是否语义断句
            //transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
            //是否开启顺滑
            transcriber.addCustomedParam("disfluency", true);
            //设置是否开启词模式
            //transcriber.addCustomedParam("enable_words",true);
            //设置vad的模型
            //transcriber.addCustomedParam("vad_model","farfield");
            //设置vad噪音阈值参数,参数区间是-1到+1,比如-0.9, -0.8...0, 0.1, 0.2, 0.9,
            //方向是趋于-1的方向,判定为语音的概率越大,也就是说有可能更多噪声被当成语音被误识别;
            //越趋于+1的方向,判断成噪音的越多,也就是说有可能更多语音段被当成噪音拒绝掉,
            //该参数属高级参数,调整需慎重和重点测试。
            //transcriber.addCustomedParam("speech_noise_threshold",0.3);
            //设置训练后的定制语言模型id
            //transcriber.addCustomedParam("customization_id","你的定制语言模型id");
            //设置训练后的定制热词id
            //transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
            //设置是否忽略单句超时
            transcriber.addCustomedParam("enable_ignore_sentence_timeout", false);
            //vad断句开启后处理
            //transcriber.addCustomedParam("enable_vad_unify_post",false);
            //此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认
            transcriber.start();

            // TODO  重要提示:这里是用读取本地文件的形式模拟实时获取语音流并发送的,因为read很快,所以这里需要sleep
            // TODO  如果是真正的实时获取语音,则无需sleep, 如果是8k采样率语音,第二个参数改为8000
//                int deltaSleep = getSleepDelta(len, 8000);
//                Thread.sleep(deltaSleep);

            //通知服务端语音数据发送完毕,等待服务端处理完成
            long now = System.currentTimeMillis();
            logger.info("ASR wait for complete");
//            transcriber.stop();
            logger.info("ASR latency : " + (System.currentTimeMillis() - now) + " ms");
        } catch (Exception e) {
            System.err.println(e.getMessage());
        } finally {
            if (null != transcriber) {
//                transcriber.close();
            }
        }
    }


    public void send(byte[] contentStream) {
        transcriber.send(contentStream);
    }

    public void close() {
        try {
            transcriber.stop();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    private  SpeechTranscriberListener getTranscriberListener() {
        SpeechTranscriberListener listener = new SpeechTranscriberListener() {
            //TODO 识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回
            @Override
            public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
//                System.out.println("task_id: " + response.getTaskId() +
//                        ", name: " + response.getName() +
//                        //状态码 20000000 表示正常识别
//                        ", status: " + response.getStatus() +
//                        //句子编号,从1开始递增
//                        ", index: " + response.getTransSentenceIndex() +
//                        //当前的识别结果
//                        ", result: " + response.getTransSentenceText() +
//                        //当前已处理的音频时长,单位是毫秒
//                        ", time: " + response.getTransSentenceTime());
                //组装内容
                System.out.println("index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText());
                if (StringUtils.isNotEmpty(response.getTransSentenceText())) {
                    sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText());
                }
            }

            @Override
            public void onTranscriberStart(SpeechTranscriberResponse response) {
                // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
//                System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
            }

            @Override
            public void onSentenceBegin(SpeechTranscriberResponse response) {
//                System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
            }

            //识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息
            @Override
            public void onSentenceEnd(SpeechTranscriberResponse response) {

//                System.out.println("task_id: " + response.getTaskId() +
//                    ", name: " + response.getName() +
//                    //状态码 20000000 表示正常识别
//                    ", status: " + response.getStatus() +
//                    //句子编号,从1开始递增
//                    ", index: " + response.getTransSentenceIndex() +
//                    //当前的识别结果
//                    ", result: " + response.getTransSentenceText() +
//                    //置信度
//                    ", confidence: " + response.getConfidence() +
//                    //开始时间
//                    ", begin_time: " + response.getSentenceBeginTime() +
//                    //当前已处理的音频时长,单位是毫秒
//                    ", time: " + response.getTransSentenceTime());
//                content = content + response.getTransSentenceText();
//                System.out.println("识别结果:"+ content);

                System.out.println("整句index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText());
                if (StringUtils.isNotEmpty(response.getTransSentenceText())) {
                    sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText());
                }

            }

            //识别完毕
            @Override
            public void onTranscriptionComplete(SpeechTranscriberResponse response) {
                System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
            }

            @Override
            public void onFail(SpeechTranscriberResponse response) {
                // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
                System.out.println("task_id: " + response.getTaskId() + ", status: " + response.getStatus() + ", status_text: " + response.getStatusText());
            }
        };
        return listener;
    }

    public  void sendMessage(Integer lineNo, String content) {
        try {
            AsrResultModel asrResultModel = new AsrResultModel();
            asrResultModel.setLineNo(lineNo);
            asrResultModel.setResult(content);
            String telChannel = fsService.getTelChannel(ptel);
            String message = fsService.message(ptel, asrResultModel);
            System.out.println("ptel:" + ptel +"通道:" + telChannel + "阿里消息:" + message);
            SocketServer socketServer = WebSocketMapUtil.getUserWs(telChannel);
            socketServer.sendMessage(message);
        } catch (Exception e) {
            System.out.println("阿里发送消息失败:" + e.getMessage());
        }

    }

}

4.3 调用类

aLiAsrConnnection = new ALiAsrConnnection();
                aLiAsrConnnection.init(NlsClientService.getNlsClient(), appkey, tel);

在ws创建完成以后,new 核心类。

 

你可能感兴趣的:(@JAVA学习,ASR,语音识别)