在用keras-tf backend做语音识别网络时,采用MFCC特征值输入,LSTM网络和ctc loss function,出现了以下错误:
2018-07-02 11:32:45.861523: W tensorflow/core/framework/op_kernel.cc:1318] OP_REQUIRES failed at ctc_loss_op.cc:166 : Invalid argument: Saw a non-null label (index >= num_classes- 1) following a null label, batch: 4 num_classes: 29 labels:
Traceback (most recent call last):
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/train_mspeech.py", line 47, in
ms.TrainModel(datapath, epoch = 50, batch_size = 8, save_step = 1000, filename= modelpath)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 215, in TrainModel
#self._model.fit_generator(yielddatas, save_step, nb_worker=2)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/training.py", line 2230, in fit_generator
class_weight=class_weight)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/training.py", line 1883, in train_on_batch
outputs = self.train_function(ins)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2482, in __call__
**self.session_kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Saw a non-null label (index >= num_classes - 1) following a null label, batch: 4 num_classes: 29 labels:
[[Node: ctc/CTCLoss = CTCLoss[_class=["loc:@training/SGD/gradients/ctc/CTCLoss_grad/mul"], ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=false, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ctc/Log/_123, ctc/ToInt64/_125, ctc/GatherNd, ctc/Squeeze_1/_127)]]
[[Node: training/SGD/gradients/ctc/CTCLoss_grad/mul/_155 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_1925_training/SGD/gradients/ctc/CTCLoss_grad/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'ctc/CTCLoss', defined at:
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_launcher.py", line 91, in
vspd.debug(filename, port_num, debug_id, debug_options, currentPid, run_as)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_debugger.py", line 2625, in debug
exec_file(file, globals_obj)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_util.py", line 119, in exec_file
exec_code(code, file, global_variables)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_util.py", line 95, in exec_code
exec(code_obj, global_variables)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/train_mspeech.py", line 44, in
ms = ModelSpeech(datapath)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 40, in __init__
self._model = self.graves()
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 94, in graves
label_length])
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 619, in __call__
output = self.call(inputs, **kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/layers/core.py", line 685, in call
return self.function(inputs, **arguments)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 60, in ctc_lambda_func
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 3956, in ctc_batch_cost
sequence_length=input_length), 1)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/ctc_ops.py", line 158, in ctc_loss
ignore_longer_outputs_than_inputs=ignore_longer_outputs_than_inputs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_ctc_ops.py", line 285, in ctc_loss
name=name)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Saw a non-null label (index >= num_classes - 1) following a null label, batch: 4 num_classes: 29 labels:
[[Node: ctc/CTCLoss = CTCLoss[_class=["loc:@training/SGD/gradients/ctc/CTCLoss_grad/mul"], ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=false, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ctc/Log/_123, ctc/ToInt64/_125, ctc/GatherNd, ctc/Squeeze_1/_127)]]
[[Node: training/SGD/gradients/ctc/CTCLoss_grad/mul/_155 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_1925_training/SGD/gradients/ctc/CTCLoss_grad/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
设定是输入语音长度为(1600, 26)的MFCC特征值序列,n_mfcc=26,语音最长为1600,短了就补零
labels为maxlength=64,短了补零
但是出现了以上错误
由于网络的inputdim=26,outputdim=29,所以我把ignore_longer_outputs_than_inputs设置改为了True,再运行依旧报错,不过错误好像少了一点,有希望,继续调试……
2018-07-02 11:38:30.403252: W tensorflow/core/framework/op_kernel.cc:1318] OP_REQUIRES failed at ctc_loss_op.cc:166 : Invalid argument: Saw a non-null label (index >= num_classes - 1) following a null label, batch: 2 num_classes: 29 labels:
Traceback (most recent call last):
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/train_mspeech.py", line 47, in
ms.TrainModel(datapath, epoch = 50, batch_size = 8, save_step = 1000, filename= modelpath)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 216, in TrainModel
self._model.fit_generator(yielddatas, save_step)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/training.py", line 2230, in fit_generator
class_weight=class_weight)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/training.py", line 1883, in train_on_batch
outputs = self.train_function(ins)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2482, in __call__
**self.session_kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Saw a non-null label (index >= num_classes - 1) following a null label, batch: 2 num_classes: 29 labels:
[[Node: ctc/CTCLoss = CTCLoss[_class=["loc:@training/SGD/gradients/ctc/CTCLoss_grad/mul"], ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=true, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ctc/Log/_123, ctc/ToInt64/_125, ctc/GatherNd, ctc/Squeeze_1/_127)]]
Caused by op 'ctc/CTCLoss', defined at:
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_launcher.py", line 91, in
vspd.debug(filename, port_num, debug_id, debug_options, currentPid, run_as)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_debugger.py", line 2625, in debug
exec_file(file, globals_obj)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_util.py", line 119, in exec_file
exec_code(code, file, global_variables)
File "/home/chutz/.vscode/extensions/ms-python.python-2018.4.0/pythonFiles/PythonTools/visualstudio_py_util.py", line 95, in exec_code
exec(code_obj, global_variables)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/train_mspeech.py", line 44, in
ms = ModelSpeech(datapath)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 40, in __init__
self._model = self.graves()
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 94, in graves
label_length])
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 619, in __call__
output = self.call(inputs, **kwargs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/layers/core.py", line 685, in call
return self.function(inputs, **arguments)
File "/media/chutz/000206BE0003636E/ASRT_SpeechRecognition/SpeechModel25.py", line 60, in ctc_lambda_func
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 3956, in ctc_batch_cost
sequence_length=input_length), 1)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/ctc_ops.py", line 158, in ctc_loss
ignore_longer_outputs_than_inputs=ignore_longer_outputs_than_inputs)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_ctc_ops.py", line 285, in ctc_loss
name=name)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/home/chutz/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Saw a non-null label (index >= num_classes - 1) following a null label, batch: 2 num_classes: 29 labels:
[[Node: ctc/CTCLoss = CTCLoss[_class=["loc:@training/SGD/gradients/ctc/CTCLoss_grad/mul"], ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=true, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ctc/Log/_123, ctc/ToInt64/_125, ctc/GatherNd, ctc/Squeeze_1/_127)]]
这个错误是由于output_dim决定了y_pred的shape,即输出的预测分类,它的值应该等于labels的类别总数
我所做的是中文识别,拼音类别是1421个,加上一个空白块,将output_dim设置为1422即可