import numpy as np
import wave
import os
import librosa
from scipy.io import wavfile
def enframe(signal, nw, inc, winc):
'''
将语音信号转化为帧
:param signal: 原始信号
:param nw: 每一帧的长度,自定义的参数
:param inc: 相邻帧的间隔,自定义的参数
:return:
'''
signal_length = len(signal)
if signal_length <= nw:
nf = 1
else:
nf = int(np.ceil((1.0 * signal_length - nw + inc) / inc))
pad_length = int((nf - 1) * inc + nw)
pad_signal = np.pad(signal, (0, pad_length - signal_length), 'constant')
indices = np.tile(np.arange(0, nw), (nf, 1)) + np.tile(np.arange(0, nf*inc, inc), (nw, 1)).T
indices = np.array(indices, dtype=np.int32)
frames = pad_signal[indices]
if winc is not None:
win=np.tile(winfunc,(nf,1))
return frames*win
return frames
a = np.arange(0,100)
print(a)
nw = 10
inc = 2
frames = enframe(a, nw, inc)
print("-" * 10)
print(frames)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
48 49]
----------
[[ 0 1 2 3 4 5 6 7 8 9]
[ 2 3 4 5 6 7 8 9 10 11]
[ 4 5 6 7 8 9 10 11 12 13]
[ 6 7 8 9 10 11 12 13 14 15]
[ 8 9 10 11 12 13 14 15 16 17]
[10 11 12 13 14 15 16 17 18 19]
[12 13 14 15 16 17 18 19 20 21]
[14 15 16 17 18 19 20 21 22 23]
[16 17 18 19 20 21 22 23 24 25]
[18 19 20 21 22 23 24 25 26 27]
[20 21 22 23 24 25 26 27 28 29]
[22 23 24 25 26 27 28 29 30 31]
[24 25 26 27 28 29 30 31 32 33]
[26 27 28 29 30 31 32 33 34 35]
[28 29 30 31 32 33 34 35 36 37]
[30 31 32 33 34 35 36 37 38 39]
[32 33 34 35 36 37 38 39 40 41]
[34 35 36 37 38 39 40 41 42 43]
[36 37 38 39 40 41 42 43 44 45]
[38 39 40 41 42 43 44 45 46 47]
[40 41 42 43 44 45 46 47 48 49]]