RnnNoise降噪源码解析-训练数据准备和模型转换

1.源码

Github:https://github.com/xiph/rnnoise

2.编译特征提取工具

cd rnnoise-master/src
./compile.sh

执行后会生成文件denoise_training,它有三个参数 ,使用方式如下:

./denoise_training clean_speech.pcm noise_background.pcm 5000000 > output.f32

每个音频数据导出87维特征,只要音频足够长,可以count参数设置大一点。最终导出的数据则为(n,87)。

3.准备数据

论文数据集可以到作者博客主页下载:
作者主页:https://jmvalin.ca/demo/rnnoise/
噪声数据集下载链接:https://media.xiph.org/rnnoise/rnnoise_contributions.tar.gz
将数据集拼接成一条语音:

import glob
import os
import numpy as np
import scipy.io.wavfile as wave

def merge_files(path_read_folder, path_write_wav_file, wav=None):
    #
    files = os.listdir(path_read_folder)
    merged_signal = []
    count = 0
    for filename in glob.glob(os.path.join(path_read_folder, '*.wav')):

        if count > len(files) // 2:
            sr, signal = wave.read(filename)
            merged_signal.append(signal)
        else:
            count = count + 1
    merged_signal = np.hstack(merged_signal)
    merged_signal = np.asarray(merged_signal, dtype=np.int16)
    wave.write(path_write_wav_file, sr, merged_signal)

# noisy train total
path_read_folder = "/CleanData"
path_write_wav_file = "output.wav"
merge_files(path_read_folder, path_write_wav_file)

将准备好的数据按2中的方式提取特征即可。

4.生成动态连接库(调用)

上面的部分是生成训练用的特征数据,如果你需要一个类似sdk的库文件,那么在源码根目录中执行,就可以生成librnnoise.so了。

./autogen.sh
 ./configure --prefix=/usr
 make
 make install -j 

5.获取.h5文件

python ./training/bin2hdf5.py output.f32 5000000 87 training.h5

bin2hdf5.py有4个参数,第一个为c输出的特征值文件,第二个/第三个为矩阵shape,第四个参数为输出文件。

如果output.f32中的数据为(n,87),则请把第二个参数替换为n.

6.开始训练

python ./training/rnn_train.py

7.获取权重参数

./training/dump_rnn.py weights.hdf5 rnn_data.c rnn_data.h name

注意:源码提供的dump_rnn.py不可用,需修改至如下形式:

#!/usr/bin/python

from __future__ import print_function

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.models import load_model
from keras import backend as K
import sys
import re
import numpy as np

def printVector(f, vector, name):
    v = np.reshape(vector, (-1));
    #print('static const float ', name, '[', len(v), '] = \n', file=f)
    f.write('static const rnn_weight {}[{}] = {{\n   '.format(name, len(v)))
    for i in range(0, len(v)):
        f.write('{}'.format(min(127, int(round(256*v[i])))))
        if (i!=len(v)-1):
            f.write(',')
        else:
            break;
        if (i%8==7):
            f.write("\n   ")
        else:
            f.write(" ")
    #print(v, file=f)
    f.write('\n};\n\n')
    return;

def printLayer(f, hf, layer):
    weights = layer.get_weights()
    printVector(f, weights[0], layer.name + '_weights')
    if len(weights) > 2:
        printVector(f, weights[1], layer.name + '_recurrent_weights')
    printVector(f, weights[-1], layer.name + '_bias')
    name = layer.name
    activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
    if len(weights) > 2:
        f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
                .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation))
        hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1]/3))
        hf.write('extern const GRULayer {};\n\n'.format(name));
    else:
        f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
                .format(name, name, name, weights[0].shape[0], weights[0].shape[1], activation))
        hf.write('#define {}_SIZE {}\n'.format(name.upper(), weights[0].shape[1]))
        hf.write('extern const DenseLayer {};\n\n'.format(name));


#def foo(c, name):
#    return 1

#def mean_squared_sqrt_error(y_true, y_pred):
#    return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1)

def mean_squared_sqrt_error(y_true, y_pred):
    return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1)

def my_crossentropy(y_true, y_pred):
    return K.mean(2*K.abs(y_true-0.5)*K.binary_crossentropy(y_pred, y_true), axis=-1)

def mymask(y_true):
    return K.minimum(y_true+1., 1.)

def msse(y_true, y_pred):
    return K.mean(mymask(y_true)*K.square(K.sqrt(y_pred)-K.sqrt(y_true)),axis=-1)

def mycost(y_true, y_pred):
    return K.mean(mymask(y_true) * (10*K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01*K.binary_crossentropy(y_pred, y_true)), axis=-1)

def my_accuracy(y_true, y_pred):
    return K.mean(2*K.abs(y_true-0.5) * K.equal(y_true, K.round(y_pred)), axis=-1)

class WeightClip:
    def __init__(self, c=2,name='WeightClip'):
        self.c = c

    def __call__(self, p):
            #return {'name': self.__class__.__name__, 'c': self.c}
        return K.clip(p, -self.c, self.c)

    def get_config(self):
        return {'name': self.__class__.__name__, 'c': self.c}




if __name__ == '__main__':
   # model = load_model(sys.argv[1], {'msse': mean_squared_sqrt_error,
   #                                  'mean_squared_sqrt_error': mean_squared_sqrt_error,
   #                                  'my_crossentropy': mean_squared_sqrt_error,
   #                                  'mycost': mean_squared_sqrt_error,
   #                                  'WeightClip': foo})

    model = load_model(
            sys.argv[1], 
            custom_objects={
                'msse':msse, 
                'mean_squared_sqrt_error': mean_squared_sqrt_error, 
                'my_crossentropy':my_crossentropy, 
                'mycost':mycost, 
                'WeightClip':WeightClip}
            )
    weights = model.get_weights()

    f = open(sys.argv[2], 'w')
    hf = open(sys.argv[3], 'w')

    f.write('/*This file is automatically generated from a Keras model*/\n\n')
    f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n')

    hf.write('/*This file is automatically generated from a Keras model*/\n\n')
    hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "rnn.h"\n\n')

    layer_list = []
    for i, layer in enumerate(model.layers):
        if len(layer.get_weights()) > 0:
            printLayer(f, hf, layer)
        if len(layer.get_weights()) > 2:
            layer_list.append(layer.name)

    hf.write('struct RNNState {\n')
    for i, name in enumerate(layer_list):
        hf.write('  float {}_state[{}_SIZE];\n'.format(name, name.upper()))
    hf.write('};\n')

    hf.write('\n\n#endif\n')

    f.close()
    hf.close()

将生成的rnn_data.c和rnn_data.h替换src中的代码即可。

将C代码中的相应文件替换即可。
环境配置请参考:https://blog.csdn.net/danteLiujie/article/details/102769905?spm=1001.2014.3001.5501
特征提取流程请参考:
1.https://blog.csdn.net/danteLiujie/article/details/102799038?spm=1001.2014.3001.5501
2.https://www.programmersought.com/article/16676035121/
16k采样率模型训练参考:
1.https://github.com/YongyuG/rnnoise_16k
Windows代码参考:
1.https://github.com/danteliujie/rnnoise
改进版本:
https://github.com/GregorR/rnnoise-nu/tree/master/training

你可能感兴趣的:(语音信号处理,语音降噪)