基于静音段识别,实现两个方法:
1.将非静音段的起始时间输出;
2.将其余非静音段合并导出;
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence, detect_silence
def tag_nosilence_vocals_by_detect_silence(filePath):
"""
标记出非静音段干声
只支持mp3格式
返回:
嵌套非静音段开始和结束时间(毫秒级)的数组
"""
sound = AudioSegment.from_mp3(filePath)
print(sound.dBFS, sound.duration_seconds)
chunks = detect_silence(sound,
min_silence_len=1500,
silence_thresh=-60,
)
nosilence_list = []
tag_chunk = None
for i, chunk in enumerate(chunks):
if tag_chunk is None:
c_nosilence_list = [0,chunk[0]]
else:
c_nosilence_list = [tag_chunk[1],chunk[0]]
tag_chunk = chunk
nosilence_list.append(c_nosilence_list)
if i == len(chunks):
c_nosilence_list = [tag_chunk[1],len(sound)]
nosilence_list.append(c_nosilence_list)
return [item for item in nosilence_list if item[1] - item[0] > 1000]
def merge_nosilence_vocals_by_split_on_silence(srcFilePath,tgtFilePath):
"""
合并非静音段干声
只支持mp3格式
返回:
合并后的音频(mp3)路径(若合并结果入校和返回None)
"""
sound = AudioSegment.from_mp3(srcFilePath)
print(sound.dBFS, sound.duration_seconds)
chunks = split_on_silence(sound,
min_silence_len=1500,
silence_thresh=-60,
keep_silence=100
)
sum_u=sound[:1]
for i, chunk in enumerate(chunks):
sum_u=sum_u+chunk
sum_u.export(Path(tgtFilePath), format="mp3")
if len(sum_u) > 1500:
return tgtFilePath