Scalar training loss
(if the model has a single output and no metrics)
or list of scalars (if the model has multiple outputs
and/or metrics). The attributemodel.metrics_names
will give you
the display labels for the scalar outputs.
如果模型的输出是单一的并且没有指定指标,那么返回这段训练的损失;
如果模型有多种输出或者多个指标,返回列表
属性model.metrics_names
将为您提供标量输出的标签。(可视化时使用)
https://keras.io/metrics/
metrics[0]是loss,如果metrics中指定的指标与loss函数相同,打印:
log_dir: the path of the directory where to save the log
files to be parsed by TensorBoard.
histogram_freq: frequency (in epochs) at which to compute activation
and weight histograms for the layers of the model. If set to 0,
histograms won’t be computed. Validation data (or split) must be
specified for histogram visualizations.
write_graph: whether to visualize the graph in TensorBoard.
The log file can become quite large when
write_graph is set to True.
write_grads: whether to visualize gradient histograms in TensorBoard.
histogram_freq
must be greater than 0.
batch_size: size of batch of inputs to feed to the network
for histograms computation.
write_images: whether to write model weights to visualize as
image in TensorBoard.
board = TensorBoard(log_dir=tb_log, histogram_freq=2, batch_size=batch_size, write_images=True)
board.set_model(model)
def named_logs(metrics_names, logs):
result = {}
for log in zip(metrics_names, logs):
result[log[0]] = log[1]
return result
loss_file = open(os.path.join(workspace, "loss_file.txt"), 'w+')
board = TensorBoard(log_dir=tb_log, batch_size=batch_size, write_images=True)
board.set_model(model)
# Train.
t1 = time.time()
for i in range(1, epoch):
for (batch_x, batch_y) in train_gen.generate(xs=[tr_x], ys=[tr_y]):
metrics = model.train_on_batch(batch_x, batch_y)
loss_avg.add(metrics[0])
# TODO: add checkpoint
tr_loss = eval(model, eval_tr_gen, tr_x, tr_y)
te_loss = eval(model, eval_te_gen, te_x, te_y)
# inter_loss_avg整个epoch的平均训练损失,tr_loss这一轮训练完之后训练集的损失,te_loss这一轮训练完之后测试集的损失
loss_str = "Epoch: %d / %d, inter_loss_avg: %f, tr_loss: %f, te_loss: %f" % (i, epoch, loss_avg.val(), tr_loss, te_loss)
loss_file.write(loss_str + "\n")
loss_file.flush()
loss_avg.reset()
board.on_epoch_end(i, utils.named_logs(model.metrics_names, metrics))
# Save out training stats.
stat_dict = {'epoch': i,
'tr_loss': tr_loss,
'te_loss': te_loss, }
stat_path = os.path.join(stats_dir, "%diters.p" % i)
pickle.dump(stat_dict, open(stat_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
# Save model.
if i % save_interval == 0:
model_path = os.path.join(model_dir, "md_%d_epoch.h5" % i)
model.save(model_path)
print("Saved model to %s" % model_path)
board.on_train_end(None)
print("Training time: %s s" % (time.time() - t1,))
注意:
train_on_batch时,要生成histograms,就必须提供验证数据,并且不能是一个generator。必须是实实在在的数据。
如果是fit(),那就好办了,传参validation_data(x_val,y_val)
绕一大圈,不如加个内存条,何必造轮子
# Directorie for model checkpoint
ckpt_dir = os.path.join(workspace, "models", "%ddb" % int(tr_snr))
prp_data.create_folder(ckpt_dir)
model = Sequential()
model.add(Flatten(input_shape=(n_concat, n_freq)))
model.add(Dense(n_hid))
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(Dense(n_hid))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.2))
model.add(Dense(n_hid))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.2))
model.add(Dense(n_freq, activation='linear'))
model.summary()
model.load_weights(os.path.join(workspace, pre_model))
# TODO: diy callback func, get the loss from real val data.
callbacks_list = [
EarlyStopping(monitor='val_mean_sq', patience=6, ),
ModelCheckpoint(filepath=ckpt_dir+'/ckpt_model.h5', monitor='val_loss', save_best_only=True, mode='min', ),
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, ),
TensorBoard(log_dir=tb_log, histogram_freq=1, batch_size=batch_size, write_images=True)
]
model.compile(loss='mean_absolute_error',
optimizer=Nadam(lr=lr),
metrics=['mse'])
plot_model(model, show_layer_names=True, to_file=os.path.join(workspace,'model.png'))
# Train.
t1 = time.time()
# model.fit(tr_x, tr_y, epochs=epoch, batch_size=batch_size, callbacks=callbacks_list, validation_data=(te_x, te_y))
model.fit(tr_x, tr_y, epochs=epoch, batch_size=batch_size, callbacks=callbacks_list, validation_split=0.2)
print("Training time: %s s" % (time.time() - t1,))