为了实现finetune有如下两种解决方案:
def model_fn(features, labels, mode, params):
# .....
# finetune
if params.checkpoint_path and (not tf.train.latest_checkpoint(params.model_dir)):
checkpoint_path = None
if tf.gfile.IsDirectory(params.checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(params.checkpoint_path)
else:
checkpoint_path = params.checkpoint_path
tf.train.init_from_checkpoint(
ckpt_dir_or_file=checkpoint_path,
assignment_map={params.checkpoint_scope: params.checkpoint_scope} # 'OptimizeLoss/':'OptimizeLoss/'
)
可以在定义tf.contrib.learn.Experiment
的时候通过train_monitors
参数指定
# Define the experiment
experiment = tf.contrib.learn.Experiment(
estimator=estimator, # Estimator
train_input_fn=train_input_fn, # First-class function
eval_input_fn=eval_input_fn, # First-class function
train_steps=params.train_steps, # Minibatch steps
min_eval_frequency=params.eval_min_frequency, # Eval frequency
# train_monitors=[], # Hooks for training
# eval_hooks=[eval_input_hook], # Hooks for evaluation
eval_steps=params.eval_steps # Use evaluation feeder until its empty
)
也可以在定义tf.estimator.EstimatorSpec
的时候通过training_chief_hooks
参数指定。
不过个人觉得最好还是在estimator中定义,让experiment只专注于控制实验的模式(训练次数,验证次数等等)。
def model_fn(features, labels, mode, params):
# ....
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops,
# scaffold=get_scaffold(),
# training_chief_hooks=None
)
这里顺便解释以下tf.estimator.EstimatorSpec
对像的作用。该对象描述来一个模型的方方面面。包括:
CheckpointSaverHook
, 模型恢复等 NanTensorHook
LoggingTensorHook
等 自定义的钩子如下:
class RestoreCheckpointHook(tf.train.SessionRunHook):
def __init__(self,
checkpoint_path,
exclude_scope_patterns,
include_scope_patterns
):
tf.logging.info("Create RestoreCheckpointHook.")
#super(IteratorInitializerHook, self).__init__()
self.checkpoint_path = checkpoint_path
self.exclude_scope_patterns = None if (not exclude_scope_patterns) else exclude_scope_patterns.split(',')
self.include_scope_patterns = None if (not include_scope_patterns) else include_scope_patterns.split(',')
def begin(self):
# You can add ops to the graph here.
print('Before starting the session.')
# 1. Create saver
#exclusions = []
#if self.checkpoint_exclude_scopes:
# exclusions = [scope.strip()
# for scope in self.checkpoint_exclude_scopes.split(',')]
#
#variables_to_restore = []
#for var in slim.get_model_variables(): #tf.global_variables():
# excluded = False
# for exclusion in exclusions:
# if var.op.name.startswith(exclusion):
# excluded = True
# break
# if not excluded:
# variables_to_restore.append(var)
#inclusions
#[var for var in tf.trainable_variables() if var.op.name.startswith('InceptionResnetV1')]
variables_to_restore = tf.contrib.framework.filter_variables(
slim.get_model_variables(),
include_patterns=self.include_scope_patterns, # ['Conv'],
exclude_patterns=self.exclude_scope_patterns, # ['biases', 'Logits'],
# If True (default), performs re.search to find matches
# (i.e. pattern can match any substring of the variable name).
# If False, performs re.match (i.e. regexp should match from the beginning of the variable name).
reg_search = True
)
self.saver = tf.train.Saver(variables_to_restore)
def after_create_session(self, session, coord):
# When this is called, the graph is finalized and
# ops can no longer be added to the graph.
print('Session created.')
tf.logging.info('Fine-tuning from %s' % self.checkpoint_path)
self.saver.restore(session, os.path.expanduser(self.checkpoint_path))
tf.logging.info('End fineturn from %s' % self.checkpoint_path)
def before_run(self, run_context):
#print('Before calling session.run().')
return None #SessionRunArgs(self.your_tensor)
def after_run(self, run_context, run_values):
#print('Done running one step. The value of my tensor: %s', run_values.results)
#if you-need-to-stop-loop:
# run_context.request_stop()
pass
def end(self, session):
#print('Done with the session.')
pass