最近公司的需求是小程序接入语音,选择的是科大讯飞的语音。
这里科大讯飞的相关申请流程就不做介绍了,就说自己在做的过程遇到的坑吧。
调用的是科大讯webApi 语音听写接口
工具类:
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.poi.util.IOUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
public class DictationUntils {
/**
* 语音转换 api
*/
private static String url="http://api.xfyun.cn/v1/service/v1/iat";
private static String APPID="5bac8813";
private static String APIKEY ="f69565ec6d410c4cd6cb3ef4a8c75ef4";
public static String httpPostWithJSON(InputStream is) throws Exception {
HttpPost httpPost = new HttpPost(url);
CloseableHttpClient client = HttpClients.createDefault();
String respContent = null;
// 这两个参数改成自己的,不然IP进不了白名单也是不能调用的
String parm = "{\"engine_type\": \"sms16k\",\"aue\": \"raw\"}";
String x_param = new String(Base64.encodeBase64(parm.getBytes("UTF-8")));
String x_time = System.currentTimeMillis() / 1000L + "";
String checksum = APIKEY + x_time + x_param;
String x_checksum = getMD5(checksum).toLowerCase();
httpPost.setHeader("X-Appid", APPID);
httpPost.setHeader("X-CurTime", x_time);
httpPost.setHeader("X-Param", x_param);
httpPost.setHeader("X-CheckSum", x_checksum);
// File file = new File("C:\\lx.wav");
String audio = null;
try {
// InputStream is = new FileInputStream(file);
byte[] bytes = IOUtils.toByteArray(is);
audio = Base64.encodeBase64String(bytes);
} catch (Exception e) {
e.printStackTrace();
}
List pairList = new ArrayList();
pairList.add(new BasicNameValuePair("audio", audio));
StringEntity entity = new UrlEncodedFormEntity(pairList, "utf-8");
entity.setContentType("application/x-www-form-urlencoded; charset=utf-8");
httpPost.setEntity(entity);
HttpResponse resp = client.execute(httpPost);
if (resp.getStatusLine().getStatusCode() == 200) {
HttpEntity he = resp.getEntity();
respContent = EntityUtils.toString(he, "UTF-8");
}
return respContent;
}
public static String getMD5(String key) {
char hexDigits[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
try {
byte[] btInput = key.getBytes();
// 获得MD5摘要算法的 MessageDigest 对象
MessageDigest mdInst = MessageDigest.getInstance("MD5");
// 使用指定的字节更新摘要
mdInst.update(btInput);
// 获得密文
byte[] md = mdInst.digest();
// 把密文转换成十六进制的字符串形式
int j = md.length;
char str[] = new char[j * 2];
int k = 0;
for (int i = 0; i < j; i++) {
byte byte0 = md[i];
str[k++] = hexDigits[byte0 >>> 4 & 0xf];
str[k++] = hexDigits[byte0 & 0xf];
}
return new String(str);
} catch (Exception e) {
return null;
}
}
public static void main(String[] args) throws Exception {
String result = httpPostWithJSON(new FileInputStream(new File("C:\\lx.wav")));
System.out.println(result);
}
}
里面需要导入科大讯飞相关jar包,maven 要手动导入到本地仓库
遇到问题:
小程序语音翻译不准确。识别率很差。
解决办法:
因为小程序生成的语音文件的问题, 这就涉及到文件的采样率 通道数 每秒播放或录制的样本数量。小程序对应的相关字段就是
frameSize numberOfChannels sampleRate
其实最好的方法, 就是把自己的文件转成跟demo里面文件 采样率什么的都一样的文件。
把小程序生成的mp3转成讯飞语音需要的文件格式
import javazoom.spi.mpeg.sampled.file.MpegAudioFileReader;
import net.sf.json.JSONObject;
import javax.sound.sampled.*;
import java.io.*;
import java.util.HashMap;
import static javax.crypto.Cipher.SECRET_KEY;
public class MP3ToWav {
/**
* mp3的字节数组生成wav文件
*
* @param sourceBytes
* @param targetPath
*/
public static boolean byteToWav(byte[] sourceBytes, String targetPath) {
if (sourceBytes == null || sourceBytes.length == 0) {
System.out.println("Illegal Argument passed to this method");
return false;
}
try (final ByteArrayInputStream bais = new ByteArrayInputStream(sourceBytes);
final AudioInputStream sourceAIS = AudioSystem.getAudioInputStream(bais)) {
AudioFormat baseFormat = sourceAIS.getFormat();
// 设置MP3的语音格式,并设置16bit
AudioFormat mp3tFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
1, 2, baseFormat.getFrameRate(), false);
// 设置百度语音识别的音频格式
// AudioFormat pcmFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, false);
try (
// 先通过MP3转一次,使音频流能的格式完整
final AudioInputStream mp3AIS = AudioSystem.getAudioInputStream(mp3tFormat, sourceAIS);
) {
// 根据路径生成wav文件
AudioSystem.write(mp3AIS, AudioFileFormat.Type.WAVE, new File(targetPath));
}
return true;
} catch (IOException e) {
e.printStackTrace();
System.out.println("文件转换异常:" + e.getMessage());
return false;
} catch (UnsupportedAudioFileException e) {
e.printStackTrace();
System.out.println("文件转换异常:" + e.getMessage());
return false;
}
}
/**
* 将文件转成字节流
*
* @param filePath
* @return
*/
public static byte[] getBytes(String filePath) {
byte[] buffer = null;
try {
File file = new File(filePath);
FileInputStream fis = new FileInputStream(file);
ByteArrayOutputStream bos = new ByteArrayOutputStream(1000);
byte[] b = new byte[1000];
int n;
while ((n = fis.read(b)) != -1) {
bos.write(b, 0, n);
}
fis.close();
bos.close();
buffer = bos.toByteArray();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer;
}
/**
* MP3转换PCM文件方法
*
* @param mp3filepath 原始文件路径
* @param pcmfilepath 转换文件的保存路径
* @throws Exception
*/
public static void convertMP32PCM(String mp3filepath, String pcmfilepath) throws Exception {
AudioInputStream audioInputStream = getPcmAudioInputStream(mp3filepath);
AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, new File(pcmfilepath));
}
private static AudioInputStream getPcmAudioInputStream(String mp3filepath) {
File mp3 = new File(mp3filepath);
AudioInputStream audioInputStream = null;
AudioFormat targetFormat = null;
try {
// = null;
MpegAudioFileReader mp = new MpegAudioFileReader();
AudioInputStream in = mp.getAudioInputStream(mp3);
AudioFormat baseFormat = in.getFormat();
targetFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
baseFormat.getChannels(), baseFormat.getFrameSize(), baseFormat.getFrameRate(), false);
audioInputStream = AudioSystem.getAudioInputStream(targetFormat, in);
} catch (Exception e) {
e.printStackTrace();
}
return audioInputStream;
}
private static void getMp3Info(String wavFilepath) {
try {
javax.sound.sampled.AudioInputStream audio_file = javax.sound.sampled.AudioSystem.getAudioInputStream(new File(wavFilepath));
audio_file.getFrameLength();
AudioFormat format = audio_file.getFormat();
System.out.println("getFrameSize:"+format.getFrameSize());
System.out.println("getFrameRate:"+format.getFrameRate());
System.out.println("getChannels:"+format.getChannels());
System.out.println("getSampleRate:"+format.getSampleRate());
System.out.println(format.getEncoding());
System.out.println(format.getSampleSizeInBits());
} catch (Exception e) {
}
}
public static void main(String args[]) throws Exception {
String filePath = "C:\\hhh.mp3";
String targetPath = "C\\xx.wav";
byteToWav(getBytes(filePath),targetPath);
//getMp3Info(filePath);
}
}
注:在踩坑的过程中也遇到一些mp3 文件转换成pcm文件时的问题。 也是应该原文件的格式参数问题。