参考tf.train.saver文档
参考StackOverflow
原网络结构如下:
for layer_id in range(n_layers):
n_channels = base_channels * (2 ** (layer_id // blk_size))
if layer_id == 0:
net = slim.conv2d(input_speech, num_outputs=n_channels, kernel_size=[1, ksz], stride=[1, 2],
activation_fn=lrelu, normalizer_fn=norm_fn, reuse=reuse, scope='loss_conv_%d' % layer_id)
layers.append(net)
elif layer_id < n_layers - 1:
net = slim.conv2d(layers[-1], n_channels, [1, ksz], [1, 2], activation_fn=lrelu, normalizer_fn=norm_fn,
reuse=reuse, scope='loss_conv_%d' % layer_id)
layers.append(net)
else:
net = slim.conv2d(layers[-1], n_channels, [1, ksz], activation_fn=lrelu, normalizer_fn=norm_fn,
reuse=reuse, scope='loss_conv_%d' % layer_id)
layers.append(net)
return layers
恢复全部权重时的训练信息:
T: 1 2019-03-30 13:19:21| AVG_LOSS:0.457420 | CLASS_ERROR:0.198291 | TAG_ERROR:0.336931
variables = slim.get_variables_to_restore()
v_list= []
for i in range(7):
v_list.append("loss_conv_" + str(i))
saver_3 = tf.train.Saver([v for v in variables if v.name.split("/")[0] in v_list[:3]]) # 恢复3层权重
saver_7 = tf.train.Saver([v for v in variables if v.name.split("/")[0] in v_list]) # 恢复7层权重
with tf.Session(config=config) as sess:
sess.run(init_op)
if ckpt_path is not None:
saver_3.restore(sess, ckpt_path)
T: 1 2019-04-05 19:01:28 | AVG_LOSS:0.485033 | CLASS_ERROR:0.068376 | TAG_ERROR:0.488907
好像变化并不大,CLASS_ERROR
甚至还降低不少。
模型保存后:
文件大小为125KB,而之前为4743KB
这不是我想要的,我的期望是,恢复模型前3层的权重,而之后的网络重新训练(但是所有权重都更新)。所以,保存后的权重文件的文件大小应该是不变的
真相:恢复保存的,都是3层
找了很久的资料,老外也是,写来写去大都是差不多的文章
大概问题所在:graph的问题
貌似需要从一个graph恢复,然后写入新的graph
这个问题先放着,毕业论文要紧
2019/4/5
查找资料后,较多这样的思路:
x = tf.transpose(x, [0, 2, 1, 3]) # [batch_size, feature_w, feature_h, FLAGS.out_channels]
# treat `feature_w` as max_timestep in lstm.
x = tf.reshape(x, [FLAGS.batch_size, feature_w, feature_h * FLAGS.out_channels])
但是对于我的项目,输入的尺寸是不确定的。
折腾良久,有None传参到reshape错误,有reshape尺寸对不上的错误,等等等等。
对于动态的size,tensorflow reshape是真的不太好转换
tf.reshape() of a tensor with an unknown dimension (None) does not seem to work,最后我还是把输入尺寸固定了。
最终的代码:
def clstm_lossnet(input_speech, ksz=3, reuse=False, n_layers=9, cell_units=None, opt_prob=1.0, batch_size=1):
print("+++++++++++++++++++++++++++++ cnn_lstm model +++++++++++++++++++++++++++++")
# https://github.com/mounalab/LSTM-RNN-VAD/blob/master/vad_model.py
# https://github.com/inikdom/rnn-speech/blob/master/models/AcousticModel.py
# https://github.com/watsonyanghx/CNN_LSTM_CTC_Tensorflow/blob/master/cnn_lstm_otc_ocr.py
# ################## 1、 ################### #
# ############## CNN 部分 ############## #
norm_fn = slim.batch_norm
features = input_speech
n_channels = [32, 32, 32, 64, 64, 128, 128, 256, 256]
out_channels = n_channels[-1]
for layer_id in range(n_layers):
if layer_id == 0:
features = slim.conv2d(input_speech, num_outputs=n_channels[layer_id], kernel_size=[1, ksz], stride=[1, 2],
activation_fn=lrelu, normalizer_fn=norm_fn, reuse=reuse,
scope='loss_conv_%d' % layer_id)
else:
features = slim.conv2d(features, n_channels[layer_id], [1, ksz], [1, 2], activation_fn=lrelu, normalizer_fn=norm_fn,
reuse=reuse, scope='loss_conv_%d' % layer_id)
_, feature_h, feature_w, _ = features.get_shape().as_list()
# ################## 2、 ################### #
# ############## LSTM部分 ############## #
# [batch_size, feature_h, feature_w, out_channels]
# treat `feature_w` as max_timestep in lstm.
features = tf.reshape(features, [1, feature_w, feature_h * out_channels])
if cell_units is None:
cell_units = [48, 48]
lstms = [tf.contrib.rnn.BasicLSTMCell(units) for units in cell_units]
drops = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=opt_prob) for lstm in lstms]
stacked_lstm = tf.contrib.rnn.MultiRNNCell(drops)
initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
outputs, _ = tf.nn.dynamic_rnn(stacked_lstm, features, initial_state=initial_state, dtype=tf.float32)
w = tf.get_variable(name='lstm_w_out',
shape=[cell_units[-1], 256],
dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable(name='lstm_b_out',
shape=256,
dtype=tf.float32,
initializer=tf.constant_initializer())
# Reshaping to apply the same weights over the timesteps
outputs = tf.reshape(outputs, [-1, cell_units[-1]]) # [batch_size * max_stepsize, FLAGS.num_hidden]
outputs = tf.matmul(outputs, w) + b
# Reshaping back to the original shape
outputs = tf.reshape(outputs, [1, 1, -1, 1])
return outputs