freeswitch对接FunASR实时语音听写

freeswitch对接FunASR实时语音听写_第1张图片

1、镜像启动

通过下述命令拉取并启动FunASR软件包的docker镜像:

sudo docker pull \
  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.7
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10096:10095 -it --privileged=true \
  -v $PWD/funasr-runtime-resources/models:/workspace/models \
  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.7

服务端启动

docker启动之后,启动 funasr-wss-server-2pass服务程序:

cd FunASR/runtime
nohup bash run_server_2pass.sh \
  --download-model-dir /workspace/models \
  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
  --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
  --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  \
  --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
  --itn-dir thuduj12/fst_itn_zh --certfile 0 \
  --hotword /workspace/models/hotwords.txt > log.txt 2>&1 &

# 如果您想关闭ssl,增加参数:--certfile 0 这里不需要ssl 需要加参数
# 如果您想使用时间戳或者nn热词模型进行部署,请设置--model-dir为对应模型:
#   damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx(时间戳)
#   damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx(nn热词)
# 如果您想在服务端加载热词,请在宿主机文件./funasr-runtime-resources/models/hotwords.txt配置热词(docker映射地址为/workspace/models/hotwords.txt):
#   每行一个热词,格式(热词 权重):阿里巴巴 20(注:热词理论上无限制,但为了兼顾性能和效果,建议热词长度不超过10,个数不超过1k,权重1~100)

 运行命令之后就会下载社区的模型  启动成功之后会有端口显示

freeswitch对接FunASR实时语音听写_第2张图片

接下来就可以做语音翻译了

2.获取freeswitch音频发送到Funasr 可以参考之前获取的

freeswitch智能外呼系统搭建流程_freeswitch 外联模式的流程-CSDN博客

3.java 用ws 连接就行了 官方的样例

//
// Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
// Reserved. MIT License  (https://opensource.org/licenses/MIT)
//
/*
 * // 2022-2023 by [email protected]
 */
// java FunasrWsClient
// usage:  FunasrWsClient [-h] [--port PORT] [--host HOST] [--audio_in AUDIO_IN] [--num_threads NUM_THREADS]
//                 [--chunk_size CHUNK_SIZE] [--chunk_interval CHUNK_INTERVAL] [--mode MODE]
package websocket;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.*;
import java.util.Map;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.drafts.Draft;
import org.java_websocket.handshake.ServerHandshake;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** This example demonstrates how to connect to websocket server. */
public class FunasrWsClient extends WebSocketClient {

  public class RecWavThread extends Thread {
    private FunasrWsClient funasrClient;

    public RecWavThread(FunasrWsClient funasrClient) {
      this.funasrClient = funasrClient;
    }

    public void run() {
      this.funasrClient.recWav();
    }
  }

  private static final Logger logger = LoggerFactory.getLogger(FunasrWsClient.class);

  public FunasrWsClient(URI serverUri, Draft draft) {
    super(serverUri, draft);
  }

  public FunasrWsClient(URI serverURI) {
    
    super(serverURI);
  }

  public FunasrWsClient(URI serverUri, Map httpHeaders) {
    super(serverUri, httpHeaders);
  }

  public void getSslContext(String keyfile, String certfile) {
    // TODO
    return;
  }

  // send json at first time
  public void sendJson(
      String mode, String strChunkSize, int chunkInterval, String wavName, boolean isSpeaking,String suffix) {
    try {

      JSONObject obj = new JSONObject();
      obj.put("mode", mode);
      JSONArray array = new JSONArray();
      String[] chunkList = strChunkSize.split(",");
      for (int i = 0; i < chunkList.length; i++) {
        array.add(Integer.valueOf(chunkList[i].trim()));
      }

      obj.put("chunk_size", array);
      obj.put("chunk_interval", new Integer(chunkInterval));
      obj.put("wav_name", wavName);

      if(FunasrWsClient.hotwords.trim().length()>0)
      {
          String regex = "\\d+";
          JSONObject jsonitems = new JSONObject();
          String[] items=FunasrWsClient.hotwords.trim().split(" ");
          Pattern pattern = Pattern.compile(regex);
          String tmpWords="";
          for(int i=0;i           {

              Matcher matcher = pattern.matcher(items[i]);

              if (matcher.matches()) {

                jsonitems.put(tmpWords.trim(), items[i].trim());
                tmpWords="";
                continue;
              }
              tmpWords=tmpWords+items[i]+" ";

          }

          obj.put("hotwords", jsonitems.toString());
      }

      if(suffix.equals("wav")){
          suffix="pcm";
      }
      obj.put("wav_format", suffix);
      if (isSpeaking) {
        obj.put("is_speaking", new Boolean(true));
      } else {
        obj.put("is_speaking", new Boolean(false));
      }
      logger.info("sendJson: " + obj);
      // return;

      send(obj.toString());

      return;
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  // send json at end of wav
  public void sendEof() {
    try {
      JSONObject obj = new JSONObject();

      obj.put("is_speaking", new Boolean(false));

      logger.info("sendEof: " + obj);
      // return;

      send(obj.toString());
      iseof = true;
      return;
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  // function for rec wav file
  public void recWav() {
    String fileName=FunasrWsClient.wavPath;
    String suffix=fileName.split("\\.")[fileName.split("\\.").length-1];
    sendJson(mode, strChunkSize, chunkInterval, wavName, true,suffix);
    File file = new File(FunasrWsClient.wavPath);

    int chunkSize = sendChunkSize;
    byte[] bytes = new byte[chunkSize];

    int readSize = 0;
    try (FileInputStream fis = new FileInputStream(file)) {
      if (FunasrWsClient.wavPath.endsWith(".wav")) {
        fis.read(bytes, 0, 44); //skip first 44 wav header
      }
      readSize = fis.read(bytes, 0, chunkSize);
      while (readSize > 0) {
        // send when it is chunk size
        if (readSize == chunkSize) {
          send(bytes); // send buf to server

        } else {
          // send when at last or not is chunk size
          byte[] tmpBytes = new byte[readSize];
          for (int i = 0; i < readSize; i++) {
            tmpBytes[i] = bytes[i];
          }
          send(tmpBytes);
        }
        // if not in offline mode, we simulate online stream by sleep
        if (!mode.equals("offline")) {
          Thread.sleep(Integer.valueOf(chunkSize / 32));
        }

        readSize = fis.read(bytes, 0, chunkSize);
      }

      if (!mode.equals("offline")) {
        // if not offline, we send eof and wait for 3 seconds to close
        Thread.sleep(2000);
        sendEof();
        Thread.sleep(3000);
        close();
      } else {
        // if offline, just send eof
        sendEof();
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  @Override
  public void onOpen(ServerHandshake handshakedata) {

    RecWavThread thread = new RecWavThread(this);
    thread.start();
  }

  @Override
  public void onMessage(String message) {
    JSONObject jsonObject = new JSONObject();
    JSONParser jsonParser = new JSONParser();
    logger.info("received: " + message);
    try {
      jsonObject = (JSONObject) jsonParser.parse(message);
      logger.info("text: " + jsonObject.get("text"));
      if(jsonObject.containsKey("timestamp"))
      {
          logger.info("timestamp: " + jsonObject.get("timestamp"));
      }
    } catch (org.json.simple.parser.ParseException e) {
      e.printStackTrace();
    }
    if (iseof && mode.equals("offline") && !jsonObject.containsKey("is_final")) {
      close();
    }
     
    if (iseof && mode.equals("offline") && jsonObject.containsKey("is_final") && jsonObject.get("is_final").equals("false")) {
      close();
    }
  }

  @Override
  public void onClose(int code, String reason, boolean remote) {

    logger.info(
        "Connection closed by "
            + (remote ? "remote peer" : "us")
            + " Code: "
            + code
            + " Reason: "
            + reason);
  }

  @Override
  public void onError(Exception ex) {
    logger.info("ex: " + ex);
    ex.printStackTrace();
    // if the error is fatal then onClose will be called additionally
  }

  private boolean iseof = false;
  public static String wavPath;
  static String mode = "online";
  static String strChunkSize = "5,10,5";
  static int chunkInterval = 10;
  static int sendChunkSize = 1920;
  static String hotwords="";
  static String fsthotwords="";

  String wavName = "javatest";

  public static void main(String[] args) throws URISyntaxException {
    ArgumentParser parser = ArgumentParsers.newArgumentParser("ws client").defaultHelp(true);
    parser
        .addArgument("--port")
        .help("Port on which to listen.")
        .setDefault("8889")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--host")
        .help("the IP address of server.")
        .setDefault("127.0.0.1")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--audio_in")
        .help("wav path for decoding.")
        .setDefault("asr_example.wav")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--num_threads")
        .help("num of threads for test.")
        .setDefault(1)
        .type(Integer.class)
        .required(false);
    parser
        .addArgument("--chunk_size")
        .help("chunk size for asr.")
        .setDefault("5, 10, 5")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--chunk_interval")
        .help("chunk for asr.")
        .setDefault(10)
        .type(Integer.class)
        .required(false);

    parser
        .addArgument("--mode")
        .help("mode for asr.")
        .setDefault("offline")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--hotwords")
        .help("hotwords, splited by space, hello 30 nihao 40")
        .setDefault("")
        .type(String.class)
        .required(false);
    String srvIp = "";
    String srvPort = "";
    String wavPath = "";
    int numThreads = 1;
    String chunk_size = "";
    int chunk_interval = 10;
    String strmode = "offline";
    String hot="";
    try {
      Namespace ns = parser.parseArgs(args);
      srvIp = ns.get("host");
      srvPort = ns.get("port");
      wavPath = ns.get("audio_in");
      numThreads = ns.get("num_threads");
      chunk_size = ns.get("chunk_size");
      chunk_interval = ns.get("chunk_interval");
      strmode = ns.get("mode");
      hot=ns.get("hotwords");
      System.out.println(srvPort);

    } catch (ArgumentParserException ex) {
      ex.getParser().handleError(ex);
      return;
    }

    FunasrWsClient.strChunkSize = chunk_size;
    FunasrWsClient.chunkInterval = chunk_interval;
    FunasrWsClient.wavPath = wavPath;
    FunasrWsClient.mode = strmode;
    FunasrWsClient.hotwords=hot;
    System.out.println(
        "serIp="
            + srvIp
            + ",srvPort="
            + srvPort
            + ",wavPath="
            + wavPath
            + ",strChunkSize"
            + strChunkSize);

    class ClientThread implements Runnable {

      String srvIp;
      String srvPort;

      ClientThread(String srvIp, String srvPort, String wavPath) {
        this.srvIp = srvIp;
        this.srvPort = srvPort;
      }

      public void run() {
        try {

          int RATE = 16000;
          String[] chunkList = strChunkSize.split(",");
          int int_chunk_size = 60 * Integer.valueOf(chunkList[1].trim()) / chunkInterval;
          int CHUNK = Integer.valueOf(RATE / 1000 * int_chunk_size);
          int stride =
              Integer.valueOf(
                  60 * Integer.valueOf(chunkList[1].trim()) / chunkInterval / 1000 * 16000 * 2);
          System.out.println("chunk_size:" + String.valueOf(int_chunk_size));
          System.out.println("CHUNK:" + CHUNK);
          System.out.println("stride:" + String.valueOf(stride));
          FunasrWsClient.sendChunkSize = CHUNK * 2;

          String wsAddress = "ws://" + srvIp + ":" + srvPort;

          FunasrWsClient c = new FunasrWsClient(new URI(wsAddress));

          c.connect();

          System.out.println("wsAddress:" + wsAddress);
        } catch (Exception e) {
          e.printStackTrace();
          System.out.println("e:" + e);
        }
      }
    }
    for (int i = 0; i < numThreads; i++) {
      System.out.println("Thread1 is running...");
      Thread t = new Thread(new ClientThread(srvIp, srvPort, wavPath));
      t.start();
    }
  }
}
 

运行之后java和Funasr控制台 就能识别了

freeswitch对接FunASR实时语音听写_第3张图片

服务器性能配置官方参考。

用户可以根据自己的业务需求,选择合适的服务器配置,推荐配置为:

  • 配置1: (X86,计算型),4核vCPU,内存8G,单机可以支持大约16路的请求
  • 配置2: (X86,计算型),16核vCPU,内存32G,单机可以支持大约32路的请求
  • 配置3: (X86,计算型),64核vCPU,内存128G,单机可以支持大约100路的请求

详细性能测试报告https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/benchmark_onnx_cpp.md

官方文档

https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/SDK_advanced_guide_online_zh.md

如果模块或流程觉得麻烦可以到 

https://item.taobao.com/item.htm?id=653611115230

你可能感兴趣的:(java,freeswitch,Funasr)