基于Streamlit实现的音频处理示例,包含录音、语音转文本、文件下载和进度显示功能,整合了多个技术方案:
# 安装依赖库
pip install streamlit streamlit-webrtc audio-recorder-streamlit openai-whisper python-dotx
import streamlit as st
from audio_recorder_streamlit import audio_recorder
import whisper
import os
from datetime import datetime
# 初始化模型
@st.cache_resource
def load_whisper_model():
return whisper.load_model("base") # 使用基础版模型
model = load_whisper_model()
# 界面布局
st.title(" 音频处理工作流")
col1, col2 = st.columns(2)
with col1:
# 音频录制组件
audio_bytes = audio_recorder(
text="点击录音",
recording_color="#e87070",
neutral_color="#6aa36f",
icon_name="microphone",
sample_rate=16000
)
# 保存录音文件
if audio_bytes:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
audio_path = f"audio_{timestamp}.wav"
with open(audio_path, "wb") as f:
f.write(audio_bytes)
st.session_state.audio_path = audio_path
st.audio(audio_bytes, format="audio/wav")
with col2:
# 语音转文本功能
if 'audio_path' in st.session_state and st.button("开始转换"):
progress_bar = st.progress(0)
status_text = st.empty()
try:
status_text.text("加载音频文件...")
progress_bar.progress(20)
# 使用Whisper进行转换
status_text.text("语音识别中...")
result = model.transcribe(st.session_state.audio_path)
progress_bar.progress(80)
# 显示结果
st.subheader("转换结果")
st.code(result["text"], language="text")
st.session_state.text_result = result["text"]
# 生成下载按钮
with st.expander("下载选项"):
st.download_button(
label="下载文本",
data=st.session_state.text_result,
file_name=f"transcript_{timestamp}.txt",
mime="text/plain"
)
with open(st.session_state.audio_path, "rb") as f:
st.download_button(
label="下载音频",
data=f,
file_name=audio_path,
mime="audio/wav"
)
progress_bar.progress(100)
status_text.text("处理完成!")
except Exception as e:
st.error(f"处理失败: {str(e)}")
progress_bar.progress(0)
音频录制
audio-recorder-streamlit
库实现浏览器原生录音语音识别
@st.cache_resource
缓存模型提升性能进度管理
st.spinner
实现加载动画文件下载
云端部署
# 在HuggingFace Spaces部署时添加配置
STREAMLIT_SERVER_PORT = 8501
性能提升
tiny.en
/small.en
)model = whisper.load_model("base", device="cuda")
扩展功能
import matplotlib.pyplot as plt
from scipy.io import wavfile
rate, data = wavfile.read(audio_path)
plt.specgram(data, Fs=rate)
st.pyplot(plt)
五、部署注意事项
依赖管理
# requirements.txt
streamlit>=1.28
openai-whisper==20231106
audio-recorder-streamlit==0.1.7
浏览器兼容性
资源监控
# 监控内存使用
ps -o pid,user,%mem,command ax | grep streamlit
该方案整合了本地模型推理与Streamlit的交互优势,相比纯API方案可节省90%的云端调用成本。通过进度分段显示和异常捕获机制,使长时间任务具备更好的用户体验。