基于librosa.pyin方法(链接)获取基频最值,对比标准音高序列,得到音域范围;
def create_standard_pitch_sequence():
"""
生成一个包含名称的标准音高序列
"""
T = ["C","C♯/D♭", "D", "D♯/E♭","E","F","F♯/G♭","G","G♯/A♭","A","A♯/B♭","B"]
t0 = [0,1,2,3,4,5,6,7,8,9]
c = [[16.352, 32.703,65.406,130.81,261.63,523.25,1046.5,2093.0,4186.0,8372.0],
[17.324,34.648,69.296,138.59,277.18,554.37,1108.7,2217.5,4434.9,8869.8],
[18.354,36.708,73.416,146.83,293.66,587.33,1174.7,2349.3,4698.6,9397.3],
[19.445,38.891,77.782,155.56,311.13,622.25,1244.5,2489.0,4978.0,9956.1],
[20.602,41.203,82.407,164.81,329.63,659.26,1318.5,2637.0,5274.0,10548],
[21.827,43.654,87.307,174.61,349.23,698.46,1396.9,2793.8,5587.7,11175],
[23.125,46.249,92.499,185.00,369.99,739.99,1480.0,2960.0,5919.9,11840],
[24.500,48.999,97.999,196.00,392.00,783.99,1568.0,3136.0,6271.9,12544],
[25.957,51.913,103.83,207.65,415.30,830.61,1661.2,3322.4,6644.9,13290],
[27.500,55.000,110.00,220.00,440.00,880.00,1760.0,3520.0,7040.0,14080],
[29.135,58.270,116.54,233.08,466.16,932.33,1864.7,3729.3,7458.6,14917],
[30.868,61.735,123.47,246.94,493.88,987.77,1975.5,3951.1,7902.1,15804]]
PITCH_LIST = []
for i,itemt0 in enumerate(t0):
for j,itemT in enumerate(T):
pinLabel = "{}{}".format(itemT, itemt0)
pinValue = c[j][i]
PITCH_LIST.append((pinLabel,pinValue))
return PITCH_LIST
使用文档中的测试代码:
import librosa
y, sr = librosa.load(librosa.ex('trumpet'))
f0, voiced_flag, voiced_probs = librosa.pyin(y,
fmin=librosa.note_to_hz('C0'),
fmax=librosa.note_to_hz('B9'))
times = librosa.times_like(f0)
看下可视化的效果:
import matplotlib.pyplot as plt
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
fig, ax = plt.subplots()
ax.set(title='pYIN fundamental frequency estimation')
ax.plot(times, f0, label='f0', color='cyan', linewidth=3)
ax.legend(loc='upper right')
def detect_vocal_pitchrange_info_by(f0, PITCH_LIST=None):
"""
通过librosa提取的基频序列,获取声音中的音域信息
"""
if PITCH_LIST is None:
PITCH_LIST = create_standard_pitch_sequence()
n_f0 = np.array([item for item in list(f0) if np.isnan(item) == False])
print("基频的最值:",n_f0.min(),n_f0.max())
s_v_list = [(item[0],item[1],abs(item[1] - n_f0.min())) for item in PITCH_LIST if item[1] - n_f0.min() >= 0]
e_v_list = [(item[0],item[1],abs(item[1] - n_f0.max())) for item in PITCH_LIST if item[1] - n_f0.max() <= 0]
s_v_list.sort(key=lambda x: x[2], reverse=False)
e_v_list.sort(key=lambda x: x[2], reverse=False)
s_p = s_v_list[0]
e_p = e_v_list[0]
print("音域下限:",s_p)
print("音域上限:",e_p)
return {
"f0_min": n_f0.min(),# 基频最小值
"f0_max": n_f0.max(),# 基频最大值
"r_start": s_p[0], # 音域下限音高
"r_end": e_p[0], # 音域上限音高
"r_start_f": s_p[1], # 音域下限音高所对应的频率
"r_end_f": e_p[1], # 音域上限音高所对应的频率
}
测试:
if __name__ == "__main__":
detect_vocal_pitchrange_info_by(f0)
输出:
基频的最值: 345.2170030745704 625.8586480068041
音域下限: ('F4', 349.23, 4.012996925429604)
音域上限: ('D♯/E♭5', 622.25, 3.60864800680406)
{'f0_min': 345.2170030745704,
'f0_max': 625.8586480068041,
'r_start': 'F4',
'r_end': 'D♯/E♭5',
'r_start_f': 349.23,
'r_end_f': 622.25}
import librosa
import matplotlib.pyplot as plt
def create_standard_pitch_sequence():
"""
生成一个包含名称的标准音高序列
"""
T = ["C","C♯/D♭", "D", "D♯/E♭","E","F","F♯/G♭","G","G♯/A♭","A","A♯/B♭","B"]
t0 = [0,1,2,3,4,5,6,7,8,9]
c = [[16.352, 32.703,65.406,130.81,261.63,523.25,1046.5,2093.0,4186.0,8372.0],
[17.324,34.648,69.296,138.59,277.18,554.37,1108.7,2217.5,4434.9,8869.8],
[18.354,36.708,73.416,146.83,293.66,587.33,1174.7,2349.3,4698.6,9397.3],
[19.445,38.891,77.782,155.56,311.13,622.25,1244.5,2489.0,4978.0,9956.1],
[20.602,41.203,82.407,164.81,329.63,659.26,1318.5,2637.0,5274.0,10548],
[21.827,43.654,87.307,174.61,349.23,698.46,1396.9,2793.8,5587.7,11175],
[23.125,46.249,92.499,185.00,369.99,739.99,1480.0,2960.0,5919.9,11840],
[24.500,48.999,97.999,196.00,392.00,783.99,1568.0,3136.0,6271.9,12544],
[25.957,51.913,103.83,207.65,415.30,830.61,1661.2,3322.4,6644.9,13290],
[27.500,55.000,110.00,220.00,440.00,880.00,1760.0,3520.0,7040.0,14080],
[29.135,58.270,116.54,233.08,466.16,932.33,1864.7,3729.3,7458.6,14917],
[30.868,61.735,123.47,246.94,493.88,987.77,1975.5,3951.1,7902.1,15804]]
PITCH_LIST = []
for i,itemt0 in enumerate(t0):
for j,itemT in enumerate(T):
pinLabel = "{}{}".format(itemT, itemt0)
pinValue = c[j][i]
PITCH_LIST.append((pinLabel,pinValue))
return PITCH_LIST
def get_f0_times_by_pyin(file_path):
"""
mp3格式
"""
start_t = time.time()
print(start_t)
y, sr = librosa.load(file_path)
# C2~C7人声范围
f0, voiced_flag, voiced_probs = librosa.pyin(y,
fmin=librosa.note_to_hz('C2'),
fmax=librosa.note_to_hz('C7'))
times = librosa.times_like(f0)
end_t = time.time()
print(end_t, end_t - start_t)
return f0, times
def display_f0_times(f0, times):
fig, ax = plt.subplots()
ax.set(title='pYIN fundamental frequency estimation')
ax.plot(times, f0, label='f0', color='cyan', linewidth=3)
ax.legend(loc='upper left')
plt.show()
def beter_fill_u(n_f0, u_line=300):
"""
np.array
"""
if u_line > 0 and ((n_f0 > (u_line - 10)) & (n_f0 < (u_line + 10))).any():
beter_b = u_line
beter_interval = 10
beter_t = ((n_f0 > beter_b) & (n_f0 < (beter_b + beter_interval))).any()
while beter_b > 0 and beter_t:
beter_b = beter_b + beter_interval
beter_t = ((n_f0 > beter_b) & (n_f0 < (beter_b + beter_interval))).any()
n_f0 = np.where(n_f0 > beter_b, beter_b,n_f0)
return n_f0
def beter_fill_d(n_f0, d_line=200):
"""
np.array
"""
if d_line > 0 and ((n_f0 > (d_line - 10)) & (n_f0 < (d_line + 10))).any():
beter_b = d_line
beter_interval = 10
beter_t = ((n_f0 < beter_b) & (n_f0 > (beter_b - beter_interval))).any()
while beter_b > 0 and beter_t:
beter_b = beter_b - beter_interval
beter_t = ((n_f0 < beter_b) & (n_f0 > (beter_b - beter_interval))).any()
n_f0 = np.where(n_f0 < beter_b, beter_b,n_f0)
return n_f0
def detect_vocal_pitchrange_info_by(f0, PITCH_LIST=None, beter=False):
"""
通过librosa提取的基频序列,获取声音中的音域信息
"""
if PITCH_LIST is None:
PITCH_LIST = create_standard_pitch_sequence()
n_f0 = np.array([item for item in list(f0) if np.isnan(item) == False])
if beter:
# 修正f0
n_f0 = beter_fill_d(n_f0)
n_f0 = beter_fill_u(n_f0)
print("基频的最值:",n_f0.min(),n_f0.max())
s_v_list = [(item[0],item[1],abs(item[1] - n_f0.min())) for item in PITCH_LIST if item[1] - n_f0.min() >= 0]
e_v_list = [(item[0],item[1],abs(item[1] - n_f0.max())) for item in PITCH_LIST if item[1] - n_f0.max() <= 0]
s_v_list.sort(key=lambda x: x[2], reverse=False)
e_v_list.sort(key=lambda x: x[2], reverse=False)
s_p = s_v_list[0]
e_p = e_v_list[0]
print("音域下限:",s_p)
print("音域上限:",e_p)
return {
"f0_min": n_f0.min(),# 基频最小值
"f0_max": n_f0.max(),# 基频最大值
"r_start": s_p[0], # 音域下限音高
"r_end": e_p[0], # 音域上限音高
"r_start_f": s_p[1], # 音域下限音高所对应的频率
"r_end_f": e_p[1], # 音域上限音高所对应的频率
}
if __name__ == "__main__":
f0, times = get_f0_times_by_pyin(librosa.ex('trumpet'))
display_f0_times(f0, times)
detect_vocal_pitchrange_info_by(f0, beter=True)