Github:https://github.com/xiph/rnnoise
cd rnnoise-master/src
./compile.sh
执行后会生成文件denoise_training,它有三个参数 ,使用方式如下:
./denoise_training clean_speech.pcm noise_background.pcm 5000000 > output.f32
每个音频数据导出87维特征,只要音频足够长,可以count参数设置大一点。最终导出的数据则为(n,87)。
论文数据集可以到作者博客主页下载:
作者主页:https://jmvalin.ca/demo/rnnoise/
噪声数据集下载链接:https://media.xiph.org/rnnoise/rnnoise_contributions.tar.gz
将数据集拼接成一条语音:
import glob
import os
import numpy as np
import scipy.io.wavfile as wave
def merge_files(path_read_folder, path_write_wav_file, wav=None):
#
files = os.listdir(path_read_folder)
merged_signal = []
count = 0
for filename in glob.glob(os.path.join(path_read_folder, '*.wav')):
if count > len(files) // 2:
sr, signal = wave.read(filename)
merged_signal.append(signal)
else:
count = count + 1
merged_signal = np.hstack(merged_signal)
merged_signal = np.asarray(merged_signal, dtype=np.int16)
wave.write(path_write_wav_file, sr, merged_signal)
# noisy train total
path_read_folder = "/CleanData"
path_write_wav_file = "output.wav"
merge_files(path_read_folder, path_write_wav_file)
将准备好的数据按2中的方式提取特征即可。
上面的部分是生成训练用的特征数据,如果你需要一个类似sdk的库文件,那么在源码根目录中执行,就可以生成librnnoise.so了。
./autogen.sh
./configure --prefix=/usr
make
make install -j
python ./training/bin2hdf5.py output.f32 5000000 87 training.h5
bin2hdf5.py有4个参数,第一个为c输出的特征值文件,第二个/第三个为矩阵shape,第四个参数为输出文件。
如果output.f32中的数据为(n,87),则请把第二个参数替换为n.
python ./training/rnn_train.py
./training/dump_rnn.py weights.hdf5 rnn_data.c rnn_data.h name
注意:源码提供的dump_rnn.py不可用,需修改至如下形式:
#!/usr/bin/python
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.models import load_model
from keras import backend as K
import sys
import re
import numpy as np
def printVector(f, vector, name):
v = np.reshape(vector, (-1));
#print('static const float ', name, '[', len(v), '] = \n', file=f)
f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(min(127, int(round(256*v[i])))))
if (i!=len(v)-1):
f.write(',')
else:
break;
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
#print(v, file=f)
f.write('\n};\n\n')
return;
def printLayer(f, hf, layer):
weights = layer.get_weights()
printVector(f, weights[0], layer.name + '_weights')
if len(weights) > 2:
printVector(f, weights[1], layer.name + '_recurrent_weights')
printVector(f, weights[-1], layer.name + '_bias')
name = layer.name
activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
if len(weights) > 2:
f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
.format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation))
hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1]/3))
hf.write('extern const GRULayer {};\n\n'.format(name));
else:
f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
.format(name, name, name, weights[0].shape[0], weights[0].shape[1], activation))
hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1]))
hf.write('extern const DenseLayer {};\n\n'.format(name));
#def foo(c, name):
# return 1
#def mean_squared_sqrt_error(y_true, y_pred):
# return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1)
def mean_squared_sqrt_error(y_true, y_pred):
return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1)
def my_crossentropy(y_true, y_pred):
return K.mean(2*K.abs(y_true-0.5)*K.binary_crossentropy(y_pred, y_true), axis=-1)
def mymask(y_true):
return K.minimum(y_true+1., 1.)
def msse(y_true, y_pred):
return K.mean(mymask(y_true)*K.square(K.sqrt(y_pred)-K.sqrt(y_true)),axis=-1)
def mycost(y_true, y_pred):
return K.mean(mymask(y_true) * (10*K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01*K.binary_crossentropy(y_pred, y_true)), axis=-1)
def my_accuracy(y_true, y_pred):
return K.mean(2*K.abs(y_true-0.5) * K.equal(y_true, K.round(y_pred)), axis=-1)
class WeightClip:
def __init__(self, c=2,name='WeightClip'):
self.c = c
def __call__(self, p):
#return {'name': self.__class__.__name__, 'c': self.c}
return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__, 'c': self.c}
if __name__ == '__main__':
# model = load_model(sys.argv[1], {'msse': mean_squared_sqrt_error,
# 'mean_squared_sqrt_error': mean_squared_sqrt_error,
# 'my_crossentropy': mean_squared_sqrt_error,
# 'mycost': mean_squared_sqrt_error,
# 'WeightClip': foo})
model = load_model(
sys.argv[1],
custom_objects={
'msse':msse,
'mean_squared_sqrt_error': mean_squared_sqrt_error,
'my_crossentropy':my_crossentropy,
'mycost':mycost,
'WeightClip':WeightClip}
)
weights = model.get_weights()
f = open(sys.argv[2], 'w')
hf = open(sys.argv[3], 'w')
f.write('/*This file is automatically generated from a Keras model*/\n\n')
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n')
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "rnn.h"\n\n')
layer_list = []
for i, layer in enumerate(model.layers):
if len(layer.get_weights()) > 0:
printLayer(f, hf, layer)
if len(layer.get_weights()) > 2:
layer_list.append(layer.name)
hf.write('struct RNNState {\n')
for i, name in enumerate(layer_list):
hf.write(' float {}_state[{}_SIZE];\n'.format(name, name.upper()))
hf.write('};\n')
hf.write('\n\n#endif\n')
f.close()
hf.close()
将生成的rnn_data.c和rnn_data.h替换src中的代码即可。
将C代码中的相应文件替换即可。
环境配置请参考:https://blog.csdn.net/danteLiujie/article/details/102769905?spm=1001.2014.3001.5501
特征提取流程请参考:
1.https://blog.csdn.net/danteLiujie/article/details/102799038?spm=1001.2014.3001.5501
2.https://www.programmersought.com/article/16676035121/
16k采样率模型训练参考:
1.https://github.com/YongyuG/rnnoise_16k
Windows代码参考:
1.https://github.com/danteliujie/rnnoise
改进版本:
https://github.com/GregorR/rnnoise-nu/tree/master/training