1、官网SDK地址
https://help.aliyun.com/document_detail/84430.html?spm=a2c4g.11186623.6.581.73f65edftbwk9R
阿里的SDK比腾讯好一些,可以直接从中央仓库直接下载,并且demo 写的比较详细,在开始对接之前,我们需要仔细的看接口说明下面我们文档,这样可以减少 开始对我们对接中的坑,下面开始对接:
2、 先搞jar包
com.alibaba.nls
nls-sdk-transcriber
2.1.6
3、注意事项
NlsClient使用了Netty框架,NlsClient对象的创建会消耗一定时间和资源,一经创建可以重复使用。建议调用程序将NlsClient的创建和关闭与程序本身的生命周期相结合。
SpeechTranscriber对象不可重复使用,一个识别任务对应一个SpeechTranscriber对象。例如,N个音频文件要进行N次识别任务,创建N个SpeechTranscriber对象。
SpeechTranscriberListener对象和SpeechTranscriber对象是一一对应的,不能在不同SpeechTranscriber对象使用同一个SpeechTranscriberListener对象,否则不能将各识别任务区分开。
4、对接流程
4.1 NlsClient类
从上面的注意事项 我们可以看出,NlsClient需要我们单独创建,所以我们在项目启动的时候的直接创建出来:
package com.jack.chat.application.service;
import com.alibaba.nls.client.AccessToken;
import com.alibaba.nls.client.protocol.NlsClient;
import org.apache.commons.collections4.Get;
import java.io.IOException;
/**
* @author zhenghao
* @description:
* @date 2020/8/1419:16
*/
public class NlsClientService {
private static NlsClient client;
public NlsClientService( String id, String secret, String url) {
//TODO 重要提示 创建NlsClient实例,应用全局创建一个即可,生命周期可和整个应用保持一致,默认服务地址为阿里云线上服务地址
//TODO 这里简单演示了获取token 的代码,该token会过期,实际使用时注意在accessToken.getExpireTime()过期前再次获取token
AccessToken accessToken = new AccessToken(id, secret);
try {
accessToken.apply();
System.out.println("get token: " + ", expire time: " + accessToken.getExpireTime());
// TODO 创建NlsClient实例,应用全局创建一个即可,用户指定服务地址
if (url.isEmpty()) {
client = new NlsClient(accessToken.getToken());
} else {
client = new NlsClient(url, accessToken.getToken());
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static NlsClient getNlsClient() {
return client;
}
}
package com.jack.chat.application.service;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Service;
/**
* @author zhenghao
* @description: 伴随tomcat 启动
* @date 2020/7/211:41
*/
@Service
public class ApplicationService implements ApplicationListener {
private Logger log = LoggerFactory.getLogger(ApplicationService.class);
public static ApplicationService application = null;
String id = "";
String secret = "";
String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1
@Override
public void onApplicationEvent(ContextRefreshedEvent contextRefreshedEvent) {
//启动客户端
if (contextRefreshedEvent.getApplicationContext().getParent() == null) {
log.info("客户端启动-------------------------->");
synchronized (this) {
ApplicationService.application = this;
new NlsClientService(id, secret, url);
System.out.println("阿里云 nls 初始化完毕");
}
}
}
}
4.2 核心类
package com.jack.chat.asrali.service;
import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import com.jack.chat.asr.model.AsrResultModel;
import com.jack.chat.fs.service.FsService;
import com.jack.chat.socket.service.SocketServer;
import com.jack.chat.socket.service.WebSocketMapUtil;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.context.ContextLoader;
import org.springframework.web.context.WebApplicationContext;
import java.io.IOException;
/**
* @author zhenghao
* @description:
* @date 2020/8/1418:47
*/
public class ALiAsrConnnection {
private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class);
SpeechTranscriber transcriber = null;
private FsService fsService;
private String ptel;
public void init(NlsClient client, String appKey, String tel) {
try {
ptel = tel;
WebApplicationContext wac = ContextLoader.getCurrentWebApplicationContext();
fsService = (FsService) wac.getBean("fsService");
//创建实例,建立连接
transcriber = new SpeechTranscriber(client, getTranscriberListener());
transcriber.setAppKey(appKey);
//输入音频编码方式
transcriber.setFormat(InputFormatEnum.PCM);
//输入音频采样率
transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K);
//是否返回中间识别结果
transcriber.setEnableIntermediateResult(true);
//是否生成并返回标点符号
transcriber.setEnablePunctuation(true);
//是否将返回结果规整化,比如将一百返回为100
transcriber.setEnableITN(false);
//设置vad断句参数,默认800ms,有效值[200, 2000]
//transcriber.addCustomedParam("max_sentence_silence", 600);
//设置是否语义断句
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//是否开启顺滑
transcriber.addCustomedParam("disfluency", true);
//设置是否开启词模式
//transcriber.addCustomedParam("enable_words",true);
//设置vad的模型
//transcriber.addCustomedParam("vad_model","farfield");
//设置vad噪音阈值参数,参数区间是-1到+1,比如-0.9, -0.8...0, 0.1, 0.2, 0.9,
//方向是趋于-1的方向,判定为语音的概率越大,也就是说有可能更多噪声被当成语音被误识别;
//越趋于+1的方向,判断成噪音的越多,也就是说有可能更多语音段被当成噪音拒绝掉,
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时
transcriber.addCustomedParam("enable_ignore_sentence_timeout", false);
//vad断句开启后处理
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认
transcriber.start();
// TODO 重要提示:这里是用读取本地文件的形式模拟实时获取语音流并发送的,因为read很快,所以这里需要sleep
// TODO 如果是真正的实时获取语音,则无需sleep, 如果是8k采样率语音,第二个参数改为8000
// int deltaSleep = getSleepDelta(len, 8000);
// Thread.sleep(deltaSleep);
//通知服务端语音数据发送完毕,等待服务端处理完成
long now = System.currentTimeMillis();
logger.info("ASR wait for complete");
// transcriber.stop();
logger.info("ASR latency : " + (System.currentTimeMillis() - now) + " ms");
} catch (Exception e) {
System.err.println(e.getMessage());
} finally {
if (null != transcriber) {
// transcriber.close();
}
}
}
public void send(byte[] contentStream) {
transcriber.send(contentStream);
}
public void close() {
try {
transcriber.stop();
} catch (Exception e) {
e.printStackTrace();
}
}
private SpeechTranscriberListener getTranscriberListener() {
SpeechTranscriberListener listener = new SpeechTranscriberListener() {
//TODO 识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
// System.out.println("task_id: " + response.getTaskId() +
// ", name: " + response.getName() +
// //状态码 20000000 表示正常识别
// ", status: " + response.getStatus() +
// //句子编号,从1开始递增
// ", index: " + response.getTransSentenceIndex() +
// //当前的识别结果
// ", result: " + response.getTransSentenceText() +
// //当前已处理的音频时长,单位是毫秒
// ", time: " + response.getTransSentenceTime());
//组装内容
System.out.println("index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText());
if (StringUtils.isNotEmpty(response.getTransSentenceText())) {
sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText());
}
}
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
// TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
// System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
}
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
// System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
}
//识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
// System.out.println("task_id: " + response.getTaskId() +
// ", name: " + response.getName() +
// //状态码 20000000 表示正常识别
// ", status: " + response.getStatus() +
// //句子编号,从1开始递增
// ", index: " + response.getTransSentenceIndex() +
// //当前的识别结果
// ", result: " + response.getTransSentenceText() +
// //置信度
// ", confidence: " + response.getConfidence() +
// //开始时间
// ", begin_time: " + response.getSentenceBeginTime() +
// //当前已处理的音频时长,单位是毫秒
// ", time: " + response.getTransSentenceTime());
// content = content + response.getTransSentenceText();
// System.out.println("识别结果:"+ content);
System.out.println("整句index:" + response.getTransSentenceIndex() + ":" + response.getTransSentenceText());
if (StringUtils.isNotEmpty(response.getTransSentenceText())) {
sendMessage(response.getTransSentenceIndex(), response.getTransSentenceText());
}
}
//识别完毕
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
System.out.println("task_id: " + response.getTaskId() + ", name: " + response.getName() + ", status: " + response.getStatus());
}
@Override
public void onFail(SpeechTranscriberResponse response) {
// TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
System.out.println("task_id: " + response.getTaskId() + ", status: " + response.getStatus() + ", status_text: " + response.getStatusText());
}
};
return listener;
}
public void sendMessage(Integer lineNo, String content) {
try {
AsrResultModel asrResultModel = new AsrResultModel();
asrResultModel.setLineNo(lineNo);
asrResultModel.setResult(content);
String telChannel = fsService.getTelChannel(ptel);
String message = fsService.message(ptel, asrResultModel);
System.out.println("ptel:" + ptel +"通道:" + telChannel + "阿里消息:" + message);
SocketServer socketServer = WebSocketMapUtil.getUserWs(telChannel);
socketServer.sendMessage(message);
} catch (Exception e) {
System.out.println("阿里发送消息失败:" + e.getMessage());
}
}
}
4.3 调用类
aLiAsrConnnection = new ALiAsrConnnection();
aLiAsrConnnection.init(NlsClientService.getNlsClient(), appkey, tel);
在ws创建完成以后,new 核心类。