本次记录wav文件读写的部分实现方式;
import torchaudio
wav_file = "music/jamendo/music-jamendo-0080.wav"
waveform, sample_rate = torchaudio.load(wav_file)
- type(waveform):
- waveform.shape: torch.Size([1, 3354112])
- waveform.size(): torch.Size([1, 3354112])
- waveform.dtype: torch.float32
- sample_rate: 16000
- tensor([[ 3.0518e-05, 3.0518e-05, 3.0518e-05, …, -3.0518e-05,
0.0000e+00, 0.0000e+00]])
总结
librosa安装: conda install -c conda-forge librosa
import librosa
wav_file="music/jamendo/music-jamendo-0080.wav"
sample_rate=16000
data = librosa.core.load(wav_file, sr=sample_rate)
import librosa
wav_file="立体声_缩混.wav"
sample_rate=16000
data = librosa.core.load(wav_file, sr=sample_rate, mono=False)[0]
data[0]
data[1]
- type(data):
- data[0]: (array([ 3.0517578e-05, 3.0517578e-05, 3.0517578e-05, …, -3.0517578e-05, 0.0000000e+00, 0.0000000e+00],
dtype=float32), 16000)- type(data[0]) :
- data[0].dtype: dtype(‘float32’)
总结
有一点很方便的是,不管原始采样率多少,都可以按照指定的采样率读取,也就是内部会进行采样率转换;
读取之后的数据是float32类型的,数值在-1到1之间,同torchaudio;默认采样率为22050; 默认为单通道数据(多通道会被采样为单通道),设置mono=False可以读取多通道数据;
写音频
略。
import soundfile as sf
import numpy as np
wav_path='music/jamendo/music-jamendo-0080.wav'
data, sr = sf.read(wav_path)
data = data.astype(np.float32)
data
data.dtype
sr
import soundfile as sf
import numpy as np
wav_path='立体声_缩混.wav'
data, sr = sf.read(wav_path)
data = data.transpose()
data = data.astype(np.float32)
data[0]
data[1]
data.dtype
sr
- type(data) :
- fr: 16000
import soundfile as sf
write_wav_path=‘’
sf.write(write_wav_path, data, sample_rate, ‘PCM_16’)
总结
import wave
import numpy as np
wav_file = "立体声_缩混.wav"
with wave.open(wav_file, 'rb') as fr:
params = fr.getparams()
nchannel, sampwidth, samplerate, nframs = params[:4]
strdata = fr.readframes(nframs)
# 读取的是二进制数据
data = np.frombuffer(strdata, dtype=np.int16)
# 将读取的二进制数据转为int16数据
data = data/32768
data = data.astype(np.float32)
new_data = data.reshape(-1, nchannel).transpose()
left_data = new_data[0]
right_data = new_data[1]
- type(strdata) :
- type(data):
- data.size : 6708224
- left_data.size: 3354112
write_wav_file="wave_out_left.wav"
nchannel = 1
sampwidth = 2
framerate=16000
nframes=len(left_data)
comptype="NONE"
compname="no compressed"
write_params = (nchannel, sampwidth, framerate, nframes, comptype, compname)
with wave.open(write_wav_file,'wb') as fw:
fw.setparams(write_params)
left_data = (left_data*32768).astype(np.int16)
fw.writeframes(left_data.tobytes())
write_wav_file="wave_out_right.wav"
with wave.open(write_wav_file,'wb') as fw:
fw.setparams(write_params)
right_data = (right_data*32768).astype(np.int16)
fw.writeframes(right_data.tobytes())
总结
from scipy.io import wavfile
wav_path="music/jamendo/music-jamendo-0080.wav"
sr, data = wavfile.read(wav_path) # 这里的顺序和其它的不同
# data.dtype: int16
data = data/32768
- type(data):
- data.dtype: dtype(‘float64’)
- data.shape: (3354112,)
from scipy.io import wavfile
wav_path="立体声_缩混.wav"
sr, data = wavfile.read(wav_path)
data = data/32768
- type(data):
- data.dtype: dtype(‘float64’)
- data.shape : (3354112, 2)
总结
from scipy.io import wavfile
write_wav_path=''
data *= 32768
wavefile.write(write_wav_path, sample_rate, data.astype(np.int16))
总结