小程序接入科大讯飞,web语音听写

最近公司的需求是小程序接入语音,选择的是科大讯飞的语音。
这里科大讯飞的相关申请流程就不做介绍了,就说自己在做的过程遇到的坑吧。

调用的是科大讯webApi 语音听写接口

工具类:

import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.poi.util.IOUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;

public class DictationUntils {
   /**
    * 语音转换 api
    */
   private static String url="http://api.xfyun.cn/v1/service/v1/iat";

   private static String APPID="5bac8813";
   private static String APIKEY ="f69565ec6d410c4cd6cb3ef4a8c75ef4";
   public static String httpPostWithJSON(InputStream is) throws Exception {

       HttpPost httpPost = new HttpPost(url);
       CloseableHttpClient client = HttpClients.createDefault();
       String respContent = null;

       // 这两个参数改成自己的,不然IP进不了白名单也是不能调用的

       String parm = "{\"engine_type\": \"sms16k\",\"aue\": \"raw\"}";

       String x_param = new String(Base64.encodeBase64(parm.getBytes("UTF-8")));

       String x_time = System.currentTimeMillis() / 1000L + "";
       String checksum = APIKEY + x_time + x_param;
       String x_checksum = getMD5(checksum).toLowerCase();

       httpPost.setHeader("X-Appid", APPID);
       httpPost.setHeader("X-CurTime", x_time);
       httpPost.setHeader("X-Param", x_param);
       httpPost.setHeader("X-CheckSum", x_checksum);

      // File file = new File("C:\\lx.wav");
       String audio = null;
       try {
          // InputStream is = new FileInputStream(file);
           byte[] bytes = IOUtils.toByteArray(is);
           audio = Base64.encodeBase64String(bytes);
       } catch (Exception e) {
           e.printStackTrace();
       }

       List pairList = new ArrayList();
       pairList.add(new BasicNameValuePair("audio", audio));
       StringEntity entity = new UrlEncodedFormEntity(pairList, "utf-8");
       entity.setContentType("application/x-www-form-urlencoded; charset=utf-8");
       httpPost.setEntity(entity);

       HttpResponse resp = client.execute(httpPost);
       if (resp.getStatusLine().getStatusCode() == 200) {
           HttpEntity he = resp.getEntity();
           respContent = EntityUtils.toString(he, "UTF-8");
       }
       return respContent;
   }


   public static String getMD5(String key) {
       char hexDigits[] = {
               '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
       };
       try {
           byte[] btInput = key.getBytes();
           // 获得MD5摘要算法的 MessageDigest 对象
           MessageDigest mdInst = MessageDigest.getInstance("MD5");
           // 使用指定的字节更新摘要
           mdInst.update(btInput);
           // 获得密文
           byte[] md = mdInst.digest();
           // 把密文转换成十六进制的字符串形式
           int j = md.length;
           char str[] = new char[j * 2];
           int k = 0;
           for (int i = 0; i < j; i++) {
               byte byte0 = md[i];
               str[k++] = hexDigits[byte0 >>> 4 & 0xf];
               str[k++] = hexDigits[byte0 & 0xf];
           }
           return new String(str);
       } catch (Exception e) {
           return null;
       }
   }

   public static void main(String[] args) throws Exception {
       String result = httpPostWithJSON(new FileInputStream(new File("C:\\lx.wav")));
       System.out.println(result);
   }
}

里面需要导入科大讯飞相关jar包,maven 要手动导入到本地仓库


image.png

遇到问题:
小程序语音翻译不准确。识别率很差。
解决办法:
因为小程序生成的语音文件的问题, 这就涉及到文件的采样率 通道数 每秒播放或录制的样本数量。小程序对应的相关字段就是
frameSize numberOfChannels sampleRate
其实最好的方法, 就是把自己的文件转成跟demo里面文件 采样率什么的都一样的文件。
把小程序生成的mp3转成讯飞语音需要的文件格式



import javazoom.spi.mpeg.sampled.file.MpegAudioFileReader;
import net.sf.json.JSONObject;

import javax.sound.sampled.*;
import java.io.*;
import java.util.HashMap;

import static javax.crypto.Cipher.SECRET_KEY;

public class MP3ToWav {
    /**
     * mp3的字节数组生成wav文件
     *
     * @param sourceBytes
     * @param targetPath
     */
    public static boolean byteToWav(byte[] sourceBytes, String targetPath) {
        if (sourceBytes == null || sourceBytes.length == 0) {
            System.out.println("Illegal Argument passed to this method");
            return false;
        }

        try (final ByteArrayInputStream bais = new ByteArrayInputStream(sourceBytes);
             final AudioInputStream sourceAIS = AudioSystem.getAudioInputStream(bais)) {
            AudioFormat baseFormat = sourceAIS.getFormat();
            // 设置MP3的语音格式,并设置16bit
            AudioFormat mp3tFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
                    1, 2, baseFormat.getFrameRate(), false);
            // 设置百度语音识别的音频格式
            // AudioFormat pcmFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, false);
            try (
                    // 先通过MP3转一次,使音频流能的格式完整
                    final AudioInputStream mp3AIS = AudioSystem.getAudioInputStream(mp3tFormat, sourceAIS);
            ) {
                // 根据路径生成wav文件
                AudioSystem.write(mp3AIS, AudioFileFormat.Type.WAVE, new File(targetPath));
            }
            return true;
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("文件转换异常:" + e.getMessage());
            return false;
        } catch (UnsupportedAudioFileException e) {
            e.printStackTrace();
            System.out.println("文件转换异常:" + e.getMessage());
            return false;
        }
    }

    /**
     * 将文件转成字节流
     *
     * @param filePath
     * @return
     */
    public static byte[] getBytes(String filePath) {
        byte[] buffer = null;
        try {
            File file = new File(filePath);
            FileInputStream fis = new FileInputStream(file);
            ByteArrayOutputStream bos = new ByteArrayOutputStream(1000);
            byte[] b = new byte[1000];
            int n;
            while ((n = fis.read(b)) != -1) {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return buffer;
    }

    /**
     * MP3转换PCM文件方法
     *
     * @param mp3filepath 原始文件路径
     * @param pcmfilepath 转换文件的保存路径
     * @throws Exception
     */
    public static void convertMP32PCM(String mp3filepath, String pcmfilepath) throws Exception {
        AudioInputStream audioInputStream = getPcmAudioInputStream(mp3filepath);
        AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, new File(pcmfilepath));
    }

    private static AudioInputStream getPcmAudioInputStream(String mp3filepath) {
        File mp3 = new File(mp3filepath);
        AudioInputStream audioInputStream = null;
        AudioFormat targetFormat = null;
        try {
            // = null;
            MpegAudioFileReader mp = new MpegAudioFileReader();
            AudioInputStream in = mp.getAudioInputStream(mp3);
            AudioFormat baseFormat = in.getFormat();
            targetFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
                    baseFormat.getChannels(), baseFormat.getFrameSize(), baseFormat.getFrameRate(), false);
            audioInputStream = AudioSystem.getAudioInputStream(targetFormat, in);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return audioInputStream;
    }

    private static void getMp3Info(String wavFilepath) {

        try {
            javax.sound.sampled.AudioInputStream audio_file = javax.sound.sampled.AudioSystem.getAudioInputStream(new File(wavFilepath));
            audio_file.getFrameLength();

            AudioFormat format = audio_file.getFormat();
            System.out.println("getFrameSize:"+format.getFrameSize());
            System.out.println("getFrameRate:"+format.getFrameRate());
            System.out.println("getChannels:"+format.getChannels());
            System.out.println("getSampleRate:"+format.getSampleRate());
            System.out.println(format.getEncoding());
            System.out.println(format.getSampleSizeInBits());

        } catch (Exception e) {

        }
    }

    public static void main(String args[]) throws Exception {
        String filePath = "C:\\hhh.mp3";
        String targetPath = "C\\xx.wav";

      byteToWav(getBytes(filePath),targetPath);

        //getMp3Info(filePath);
    }

}

注:在踩坑的过程中也遇到一些mp3 文件转换成pcm文件时的问题。 也是应该原文件的格式参数问题。

你可能感兴趣的:(小程序接入科大讯飞,web语音听写)