最近项目需要,做了一个语音转写的小demo,结合了微信小程序
前端:微信小程序,录音后上传到服务器
服务器:express作为服务器,multer作为上传中间件,ffmpeg作为格式转换工具,使用百度nodejs的sdk进行语音的转换
考虑到需要做语音输入,如果使用HTML5的getUserMedia,兼容性问题有点问题,最近在学微信小程序,索性就直接拿这个项目开撸了。
微信小程序的步骤
在pages/index/index.wxml
增加录音按钮
<button class="start-record" bindtap="startRecord">开始录音button>
<button class="stop-record" bindtap="stopRecord">停止录音button>
<button class="play-record" bindtap="playRecord">播放录音button>
<button class="upload-record" bindtap="uploadRecord">上传录音button>
<text class="voice-text">语音转换后的文字:{{text}}text>
在pages/index/index.js
中增加业务逻辑
onLoad中增加init方法,初始化recorderManager和innerAudioContext,分别是录音的管理器和播放语音的管理器
onLoad: function () {
this.init()
// ....
}
init: function() {
const recorderManager = wx.getRecorderManager()
recorderManager.onStart(() => {
console.log('recorder start')
})
recorderManager.onPause(() => {
console.log('recorder pause')
})
recorderManager.onStop((res) => {
console.log('recorder stop', res)
const { tempFilePath } = res
this.setData({
recordSrc: res.tempFilePath,
})
})
recorderManager.onFrameRecorded((res) => {
const { frameBuffer } = res
console.log('frameBuffer.byteLength', frameBuffer.byteLength)
})
const innerAudioContext = wx.createInnerAudioContext()
innerAudioContext.autoplay = false
innerAudioContext.onPlay(() => {
console.log('开始播放')
})
innerAudioContext.onError((res) => {
console.log(res.errMsg)
console.log(res.errCode)
})
this.setData({
recorderManager: recorderManager,
innerAudioContext: innerAudioContext
});
},
录音和结束录音
startRecord: function() {
const options = {
duration: 600000,
sampleRate: 44100,
numberOfChannels: 1,
encodeBitRate: 192000,
format: 'aac',
frameSize: 50
}
this.data.recorderManager.start(options);
},
stopRecord: function() {
this.data.recorderManager.stop();
},
为了更好的验证,播放录音:
playRecord: function() {
this.data.innerAudioContext.src = this.data.recordSrc
this.data.innerAudioContext.play()
},
语音上传客户端使用的是wx.uploadFile接口
uploadRecord: function() {
let that = this;
wx.showLoading({
title: '上传中...'
});
const uploadTask = wx.uploadFile({
url: 'http://localhost:8080/upload', //仅为示例,非真实的接口地址
filePath: this.data.recordSrc,
name: 'voice',
formData: {
'token': '12fdfsdadf',
},
success: function(res){
wx.hideLoading();
console.log(res);
var data = JSON.parse(res.data);
that.setData({
text: data.text
});
},
error: function(res) {
wx.hideLoading();
wx.showToast(res.msg)
console.log(res);
}
});
uploadTask.onProgressUpdate((res) => {
console.log('上传进度', res.progress)
console.log('已经上传的数据长度', res.totalBytesSent)
console.log('预期需要上传的数据总长度', res.totalBytesExpectedToSend)
})
}
这里有个地方需要注意,我们的url写的是http开头的,所以需要设置一下
勾选工具-项目详情-不校验合法域名、web-view(业务域名)、TLS 版本以及 HTTPS 证书,否则会报错
上传服务器:
const convert = require('./convert');
const voice = require('./voice');
const express = require('express');
const co = require('co');
const app = express();
const multer = require('multer');
const upload = multer({ dest: 'uploads/' });
app.post('/upload', upload.array('voice'), function (req, res, next) {
const rpath = req.files[0].path;
const rname = req.files[0].filename;
convert(rpath, rname).then(function(fpath){
return voice(fpath)
}).then(function(result){
// 文件转换
console.log(result.result[0]);
res.status(200).json({
text: result.result[0]
});
}).catch(function(err) {
console.log(err);
res.status(200).json({
msg: err
});
});
})
app.listen(8080);
因为微信上传的语音只有mp3和aac格式的,而百度的restful的api只支持pcm和wav格式的语音,所以我们需要进行语音格式的转换,转换工具使用的是ffmpeg
首先安装ffmpeg brew install ffmpeg
然后在convert.js利用shell命令进行格式转换
const process = require('child_process');
function convert (fpath, fname) {
console.log('in convert');
const command = 'ffmpeg -y -i ./' + fpath + ' -acodec pcm_s16le -f s16le -ac 1 -ar 16000 ' + fname + '.pcm';
return new Promise(function(resolve, reject) {
console.log(command);
process.exec(command, function (error, stdout, stderr) {
console.log(error);
if (error !== null) {
console.log('exec error: ' + error);
reject(error);
return;
}
resolve(fname + '.pcm');
});
});
}
module.exports = convert;
最后一步就是利用百度的sdk进行语音转文字啦,不过各位需要自己去百度的开放平台注册,然后新建应用,获取到APPID/AK/SK
const AipSpeechClient = require("baidu-aip-sdk").speech;
// 设置APPID/AK/SK
const APP_ID = "...";
const API_KEY = "...";
const SECRET_KEY = "...";
// 新建一个对象,建议只保存一个对象调用服务接口
const client = new AipSpeechClient(APP_ID, API_KEY, SECRET_KEY);
const fs = require('fs');
function voiceToText(fpath) {
console.log('in voiceToText');
return new Promise(function(resolve, reject) {
const voice = fs.readFileSync(fpath);
const voiceBuffer = new Buffer(voice);
// 识别本地文件
client.recognize(voiceBuffer, 'pcm', 16000).then(function (result) {
console.log(': ' + JSON.stringify(result));
if (result.err_no == 0) {
resolve(result);
} else {
reject(result);
}
}, function(err) {
console.log(err);
reject(err);
});
});
}
module.exports = voiceToText
有一些注册的步骤,省略了,如果有疑问可以交流