在django中使用ffmpeg实现mp3格式的音频 转换成 wav格式。作适当改造,也可以在非django项目中使用,本文仅提供实现方案,作为参考。
使用场景:
在微信小程序中录音,调用讯飞的相关音频训练接口。
由于微信小程序中的录音,不能满足讯飞音频训练的音频格式(需要16k采样率,16bits采样深度,单声道的wav),需要在服务器使用ffmpeg转换成所需的格式。
实现方案:
在微信小程序中录音(mp3格式),上传到服务器,服务器将mp3格式转转成16k采样率,16bits采样深度,单声道的wav,再调用讯飞相关音频识别接口。
小程序中音频选项设置:
const audioOptions = {
duration: 60000 * 10, // 最长10分钟 (默认为60秒)
// sampleRate: 8000, // 采样率16k (默认为8000)
// encodeBitRate: 48000, // 编码率16bit (默认为48000)
// numberOfChannels: 2, // 录音通道数:单通道 (默认为2)
format: 'mp3', // wav格式 (默认为mp3)
};
服务器python代码中核心转换代码:
subprocess.call(['/usr/local/ffmpeg/bin/ffmpeg',
'-i', mp3_file_path, '-map_metadata', '-1', '-flags', '+bitexact',
'-ar', '16000', '-ac', '1', '-acodec', 'pcm_s16le', '-y', wav_file_path])
一、安装环境:
cd /root/tmp/
mkdir about_ffmpeg
cd about_ffmpeg/
ls
# 下载
wget https://ffmpeg.org/releases/ffmpeg-4.2.2.tar.bz2
wget https://sourceforge.net/projects/lame/files/lame/3.100/lame-3.100.tar.gz
wget https://libav.org/releases/libav-12.3.tar.gz
wget https://downloads.xiph.org/releases/ogg/libogg-1.3.3.tar.xz
wget http://downloads.xiph.org/releases/vorbis/libvorbis-1.3.3.tar.gz
# wget https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.3.tar.gz
wget https://code.videolan.org/videolan/x264/-/archive/master/x264-master.tar.bz2
wget http://downloads.xvid.org/downloads/xvidcore-1.3.4.tar.gz
wget http://www.tortall.net/projects/yasm/releases/yasm-1.3.0.tar.gz
# 安装
# 1、安装lame:
tar -zxvf lame-3.100.tar.gz
cd lame-3.100/
./configure
make && make install
make clean
cd ..
lame --version
# 2、安装libogg:
tar -xf libogg-1.3.3.tar.xz
cd libogg-1.3.3/
# ./conufigure
./configure --prefix=/usr --disable-static --docdir=/usr/share/doc/libogg-1.3.3
make && make install
make clean
libogg -version
cd ..
ldconfig
# 3、安装libvorbis:
tar -zxvf libvorbis-1.3.3.tar.gz
cd libvorbis-1.3.3/
./configure
make && make install
make clean
cd ..
# 4、安装xvidcore:
tar -zxvf xvidcore-1.3.4.tar.gz
cd xvidcore/
cd build/generic/
./configure
make && make install
cd ../../..
# 5、安装yasm:
tar -vxf yasm-1.3.0.tar.gz
cd yasm-1.3.0/
./configure
make && make install
cd ..
# 6、安装x264-master:
tar -vxf x264-master.tar.bz2
cd x264-master/
#./configure --enable-shared --enable-static
./configure --enable-shared --enable-static --disable-asm
make && make install
cd ..
# 7、安装libav:
cd /root/tmp/about_ffmpeg/
tar -vxf libav-12.3.tar.gz
cd libav-12.3/
./configure
make && make install
cd ..
# 8、安装ffmpeg:
tar -vxf ffmpeg-4.2.2.tar.bz2
cd ffmpeg-4.2.2/
./configure --enable-shared --prefix=/usr/local/ffmpeg
make && make install
cd ..
ldconfig
ffmpeg -version
cd /usr/local/ffmpeg/
./bin/ffmpeg -version
ldd ffmpeg
cd bin/
ldd ffmpeg
cd ..
export LD_LIBRARY_PATH=lib/
cd bin/
export LD_LIBRARY_PATH=../lib/
ldd ffmpeg
ffmpeg -version
./ffmpeg -version
ldconfig
ffmpeg -version
cd /usr/local/ffmpeg/bin/
ldd ffmpeg
cd /
ffmpeg -version
cd /root/
vi .bashrc
# 编辑内容
export PATH="/usr/local/ffmpeg/bin:$PATH"
source .bashrc
ffmpeg -version
ldd ffmpeg
ffmpeg -version
ldconfig
ffmpeg -version
cd /usr/local/ffmpeg/
ldd ffmpeg
cd bin/
ldd ffmpeg
export LD_LIBRARY_PATH=/usr/local/ffmpeg/lib/
ffmpeg
cd /
ffmpeg
ffmpeg -version
vi .bashrc
# 编辑内容
# export PATH="/usr/local/ffmpeg/bin:$PATH"
source .bashrc
ffmpeg -version
ffmpeg
cd /usr/local/ffmpeg/bin/
ldd ffmpeg
export LD_LIBRARY_PATH=/usr/local/ffmpeg/lib/
ldd ffmpeg
ffmpeg -version
cd /
ffmpeg -version
ldconfig
ffmpeg -version
ldconfig
ffmpeg -version
cd /etc/
vi ld.so.conf
# 编辑内容
include ld.so.conf.d/*.conf
/usr/local/ffmpeg/lib/
ldconfig
ffmpeg -version
ldconfig
ffmpeg
/usr/local/ffmpeg/bin/ffmpeg --version
ffmpeg --version
vi /etc/profile
# 编辑内容
PATH=$PATH:/usr/local/ffmpeg/bin
export PATH
source /etc/profile
ffmpeg --version
二、代码实现:
import subprocess
import _thread
import time
import os
import io
import hashlib
from pydub import AudioSegment
from django.conf import settings
# 获取终端上传的文件
file = request.FILES.get("uploadFile", None)
####
# 转换完成后,是否删除掉原上传的音频文件
is_remove_tmp_audio_file = False
####
logger.info('------- start')
file_data = file.read()
logger.info('111111 开始')
try:
# 先保存mp3音频到本地
file_md5 = calculate_md5(file)
logger.info('222222 file_md5:' + file_md5)
# 获取扩展类型 并 判断
file_type = get_file_extension(file)
logger.info('222222 file_type:' + file_type)
file_name = file_md5 + "." + file_type
logger.info('222222 file_name:' + file_name)
if not os.path.exists(settings.MEDIA_ROOT + 'upload/'):
os.makedirs(settings.MEDIA_ROOT + 'upload/')
file_path = settings.MEDIA_ROOT + 'upload/' + file_name
logger.info('222222 file_path:' + file_path)
f = None
try:
# 保存文件
f = open(file_path, "wb+")
# 分块写入
for chunk in file.chunks():
f.write(chunk)
logger.info('333333 保存文件成功')
# 读取文件
# 将mp3格式转转成16k采样率,16bits采样深度,单声道的wav
# 将上传上来的音频构造成AudioSegment
# io.BytesIO(file_data)
sound_access = AudioSegment.from_file(file_path, format=file_type)
logger.info('444444 sound_access.frame_rate=' + str(sound_access.frame_rate))
logger.info('444444 sound_access.sample_width=' + str(sound_access.sample_width))
logger.info('444444 sound_access.channels=' + str(sound_access.channels))
#####
if is_remove_tmp_audio_file:
# 删除临时文件
try:
os.remove(file_path)
except Exception as ex1:
logger.error(ex1)
#####
wav_path = settings.MEDIA_ROOT + 'upload/' + file_md5 + ".wav"
logger.info('555555 sound_wav wav_path:' + wav_path)
is_changed = __chage_audio_format_to_wav(file_path, wav_path)
logger.info('555555 转换格式方法一,结果:is_changed=' + str(is_changed))
if is_changed is False:
# 通过AudioSegment构造wav音频
sound_wav = AudioSegment(
data=sound_access.raw_data,
# 指定采样深度,可选值1,2,3,4
# 2 byte (16 bit) samples
sample_width=2,
# 指定采样频率
# 16kHz frame rate
frame_rate=16000,
# 指定声道数量
# stereo or mono
channels=1
)
logger.info('555555-2 转换格式方法二,sound_wav.frame_rate=' + str(sound_wav.frame_rate))
logger.info('555555-2 转换格式方法二,sound_wav.sample_width=' + str(sound_wav.sample_width))
logger.info('555555-2 转换格式方法二,sound_wav.channels=' + str(sound_wav.channels))
# file_data = sound_wav.raw_data # 取wav的内容
logger.info('555555-2 转换格式方法二,sound_wav wav_path:' + wav_path)
sound_wav.export(wav_path, format='wav')
logger.info('555555-2 转换格式方法二,结果:export wav成功')
wav_file = open(wav_path, "rb+")
logger.info('666666 读取wav_file')
file_data = wav_file.read()
logger.info('777777 已获取wav_file的数据')
#####
if is_remove_tmp_audio_file:
# 删除临时文件
try:
os.remove(wav_path)
except Exception as ex1:
logger.error(ex1)
#####
logger.info('888888 结束')
except Exception as e:
logger.error(e)
finally:
if f is not None:
f.close()
logger.info('------- end')
except Exception as ex1:
logger.error(ex1)
# 此时,file_data就是符合要求的格式的wav内容,调用接口上传给讯飞服务器即可
#
def __chage_audio_format_to_wav(source_file_path, wav_file_path):
"""
将音频格式化成wav格式(可语音识别的格式)
:param source_file_path: 原音频文件路径
:param wav_file_path: 转换成wav格式后的文件路径
"""
try:
# ffmpeg -i e1f00d114b1211ff0e4c070bbb8333f8.mp3 -map_metadata -1 -flags +bitexact -ar 16000 -ac 1 -acodec pcm_s16le -y 01.wav
logger.info('source_file_path=' + source_file_path)
logger.info('wav_file_path=' + wav_file_path)
# ffmpeg -i 输入文件全路径 输出文件全路径 其他参数
# cmd = 'ffmpeg -i ' + source_file_path + ' -map_metadata -1 -flags +bitexact -ar 16000 -ac 1 -acodec pcm_s16le' + ' -y ' + wav_file_path + ''
# logger.info('cmd=' + cmd)
# subprocess.call(cmd)
subprocess.call(['/usr/local/ffmpeg/bin/ffmpeg',
'-i', source_file_path, '-map_metadata', '-1', '-flags', '+bitexact',
'-ar', '16000', '-ac', '1', '-acodec', 'pcm_s16le', '-y', wav_file_path])
# os.system(cmd)
logger.info('转换音频成功!')
return True
except Exception as ex:
logger.error(ex)
return False
本文用到的相关安装包下载