创建trans.py 写入:
from pydub import AudioSegment # pip install pydub
def mp3_to_wav(mp3_path, wav_path): # 转换mp3为wav
song = AudioSegment.from_mp3(mp3_path)
song.export(wav_path, format='wav')
return wav_path
创建speak.py 请见我写的python调用baidu-aip朗读
from os import remove
from aip import AipSpeech # pip install baidu-aip
from trans import mp3_to_wav # 导入刚写的
def get_sound(text: str, save_path,
speed=5, tone=5, volume=5, person=1): # 生成语音
app_id = '15422825'
api_key = 'DhXGtWHYMujMVZZGRI3a7rzb'
secret_key = 'PbyUvTL31fImGthOOIP5ZbbtEOGwGOoT'
client = AipSpeech(app_id, api_key, secret_key)
result = client.synthesis(text, 'zh', 1,
{'spd': speed, 'pit': tone,
'vol': volume, 'per': person})
with open('resources/sounds/temp/temp.mp3', 'wb') as f:
f.write(result)
result = mp3_to_wav('resources/sounds/temp/temp.mp3', save_path)
remove('resources/sounds/temp/temp.mp3')
return result
音频比较 compare.py:
import wave
import numpy as np
from itertools import zip_longest
def _flatten(array):
result = []
for i in array:
if isinstance(i, list):
result.extend(i)
else:
result.append(i)
return result
def _get_data(wav_path):
wf = wave.open(wav_path, 'rb')
params = wf.getparams()
channels, width, fps, frames = params[: 4]
string_data = wf.readframes(frames)
wf.close()
wav_data = np.frombuffer(string_data, dtype=np.short)
wav_data.shape = -1, 2
wav_data = wav_data.T
return wav_data
def compare_sounds(sound1, sound2):
sound1_data = [list(i) for i in _get_data(sound1)]
sound2_data = [list(i) for i in _get_data(sound2)]
sound1_data = _flatten(sound1_data)
sound2_data = _flatten(sound2_data)
result = 0
length = 1 / max(len(sound1_data), len(sound2_data))
for one, two in zip_longest(sound1_data, sound2_data, fillvalue=0):
if one == two:
result += length
return result