实战指南:封装Faster-Whisper为FastAPI接口并实现高并发处理-附整合包
「faster-whisper」
链接:https://pan.quark.cn/s/d4ddffb1b196
在这个示例中,我们将使用 faster-whisper 的 Python 接口(通过 pip 安装 faster-whisper 包),支持从请求参数指定模型名称、设备(如 cpu 或 cuda)、计算精度(如 float16 或 int8)以及 beam_size。为了避免重复加载同一模型,我们使用全局字典作模型实例缓存,并采用线程池(ThreadPoolExecutor)异步处理转录任务,以免阻塞 FastAPI 的事件循环。
安装依赖(建议使用 Python 3.9 及以上版本)
pip install fastapi uvicorn faster-whisper
对于cuda:
pip install torch # 根据硬件环境选择合适的版本
# torch
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
如果你计划使用 GPU,请确保系统已安装合适版本的 NVIDIA 库(cuBLAS、cuDNN 等),同时根据 faster-whisper 的要求确定对应 CUDA 环境。
建议项目结构如下:
faster_whisper_fastapi/
├── models
├── app.py
└── requirements.txt
requirements.txt 示例内容:
fastapi
uvicorn
faster-whisper
下面给出完整代码示例,代码中对文件上传进行 MIME 类型校验、把上传的音频保存到临时文件,再调用 faster-whisper 进行转录,同时支持请求时指定模型参数、设备以及 beam_size。并通过线程池实现一定的并发调用支持。
import os
import tempfile
import asyncio
from fastapi import FastAPI, UploadFile, File, HTTPException, Query
from fastapi.responses import JSONResponse
from faster_whisper import WhisperModel
from concurrent.futures import ThreadPoolExecutor
from huggingface_hub import snapshot_download # 新增导入
app = FastAPI(title="Faster-Whisper FastAPI 接口")
# 模型名称与本地目录的映射字典
MODEL_PATH_MAPPING = {
"faster-distil-whisper-large-v2": "./models/faster-distil-whisper-large-v2",
"faster-distil-whisper-large-v3": "./models/faster-distil-whisper-large-v3",
"faster-distil-whisper-small.en": "./models/faster-distil-whisper-small.en",
"faster-distil-whisper-medium.en": "./models/faster-distil-whisper-medium.en",
"faster-whisper-large-v2": "./models/faster-whisper-large-v2",
"faster-whisper-large-v1": "./models/faster-whisper-large-v1",
"faster-whisper-medium.en": "./models/faster-whisper-medium.en",
"faster-whisper-medium": "./models/faster-whisper-medium",
"faster-whisper-base.en": "./models/faster-whisper-base.en",
"faster-whisper-base": "./models/faster-whisper-base",
"faster-whisper-small.en": "./models/faster-whisper-small.en",
"faster-whisper-small": "./models/faster-whisper-small",
"faster-whisper-tiny.en": "./models/faster-whisper-tiny.en",
"faster-whisper-tiny": "./models/faster-whisper-tiny",
"faster-whisper-large-v3": "./models/faster-whisper-large-v3",
}
# 新增:Hugging Face仓库ID映射
HF_REPO_IDS = {
"faster-distil-whisper-large-v2": "Systran/faster-distil-whisper-large-v2",
"faster-distil-whisper-large-v3": "Systran/faster-distil-whisper-large-v3",
"faster-distil-whisper-small.en": "Systran/faster-distil-whisper-small.en",
"faster-distil-whisper-medium.en": "Systran/faster-distil-whisper-medium.en",
"faster-whisper-large-v2": "Systran/faster-whisper-large-v2",
"faster-whisper-large-v1": "Systran/faster-whisper-large-v1",
"faster-whisper-medium.en": "Systran/faster-whisper-medium.en",
"faster-whisper-medium": "Systran/faster-whisper-medium",
"faster-whisper-base.en": "Systran/faster-whisper-base.en",
"faster-whisper-base": "Systran/faster-whisper-base",
"faster-whisper-small.en": "Systran/faster-whisper-small.en",
"faster-whisper-small": "Systran/faster-whisper-small",
"faster-whisper-tiny.en": "Systran/faster-whisper-tiny.en",
"faster-whisper-tiny": "Systran/faster-whisper-tiny",
"faster-whisper-large-v3": "Systran/faster-whisper-large-v3",
}
# 缓存已加载模型
loaded_models = {}
def get_model(model_name: str, device: str = "cpu", compute_type: str = "float16"):
"""
根据映射关系从本地路径加载模型并缓存,自动下载缺失模型。
对于不支持 FP16 的设备,自动降级到 float32 或 int8。
参数:
- model_name: 模型名称或 Hugging Face 仓库 ID
- device: "cpu" 或 "cuda"
- compute_type: 计算精度,如 "float16"、"float32" 或 "int8"
"""
# 如果请求 FP16 但设备非 CUDA,自动降级
if compute_type == "float16" and device != "cuda":
print("[Warning] 当前设备不支持 FP16,已自动降级到 float32。")
compute_type = "float32"
key = f"{model_name}_{device}_{compute_type}"
if key not in loaded_models:
model_dir = MODEL_PATH_MAPPING.get(model_name, f"./models/{model_name}")
# 自动下载模型
if not os.path.exists(model_dir):
os.makedirs(model_dir, exist_ok=True)
hf_repo_id = HF_REPO_IDS.get(model_name, model_name)
print(f"Downloading model {hf_repo_id} to {model_dir}...")
try:
snapshot_download(
repo_id=hf_repo_id,
local_dir=model_dir,
local_dir_use_symlinks=False,
resume_download=True,
token=None,
)
print(f"Model downloaded successfully to {model_dir}")
except Exception as e:
raise RuntimeError(f"模型下载失败: {e}")
if not os.path.isdir(model_dir):
raise RuntimeError(f"模型目录 {model_dir} 不存在,请检查路径或下载配置")
try:
loaded_models[key] = WhisperModel(
model_dir,
device=device,
compute_type=compute_type,
local_files_only=True # 仅使用本地文件
)
except Exception as e:
raise RuntimeError(f"加载模型失败: {e}") from e
return loaded_models[key]
# 线程池用于并发处理任务
executor = ThreadPoolExecutor(max_workers=4)
def transcribe_audio(model: WhisperModel, file_path: str, beam_size: int):
"""音频转录实现"""
try:
segments, info = model.transcribe(file_path, beam_size=beam_size)
segments = list(segments)
transcript = "".join(segment.text for segment in segments)
return {
"transcript": transcript,
"language": info.language,
"segments": [
{"start": seg.start, "end": seg.end, "text": seg.text}
for seg in segments
]
}
except Exception as e:
return {"error": str(e)}
@app.post("/transcribe")
async def transcribe(
file: UploadFile = File(...),
model_name: str = Query("faster-whisper-base", description="模型名称或 Hugging Face 仓库 ID"),
beam_size: int = Query(5, description="Beam Size"),
device: str = Query("cpu", description="运行设备: cpu/cuda"),
compute_type: str = Query("float16", description="计算精度: float16/float32/int8")
):
# 验证音频格式
allowed_types = [
"audio/wav", "audio/x-wav", "audio/wave", "audio/x-pn-wav",
"audio/mpeg", "audio/mp3"
]
if file.content_type not in allowed_types:
raise HTTPException(400, detail="不支持的音频格式")
# 保存临时文件
try:
suffix = "." + file.filename.split('.')[-1]
except Exception:
suffix = ".wav"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
contents = await file.read()
tmp.write(contents)
tmp_path = tmp.name
loop = asyncio.get_running_loop()
try:
# 异步加载模型并转录
model = await loop.run_in_executor(executor, get_model, model_name, device, compute_type)
result = await loop.run_in_executor(executor, transcribe_audio, model, tmp_path, beam_size)
except Exception as e:
raise HTTPException(500, detail=str(e))
finally:
os.unlink(tmp_path)
if "error" in result:
raise HTTPException(500, detail=result["error"])
return JSONResponse(content=result)
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
本地测试
在项目目录下运行:
uvicorn app:app --host 0.0.0.0 --port 8000 --reload
然后访问 http://localhost:8000/docs 进入 FastAPI 自动生成的 API 文档,进行文件上传测试。
并发支持说明
ThreadPoolExecutor
将转录任务分发到子线程上,利用多线程来处理阻塞的 CPU 密集型任务,支持一定的并发。uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
接口说明
file
: 上传的音频文件(WAV/MP3)model_name
: 可选,默认为 “large-v3”,可改为 “turbo”、“distil-large-v3” 等beam_size
: 转录时的 beam size,默认为 5device
: 使用的设备,“cpu” 或 “cuda”compute_type
: 计算精度,通常设为 “float16” 或 “int8”上传文件后,接口会将音频保存到临时文件,再通过 faster-whisper 进行转录,最终返回转录文本、检测的语言以及每个片段的起始时间、结束时间和文本内容。
并发支持说明