关于warm-up:tf的models里面提到warm-upw为5epoch,所以上面计算的2.16epoch相对合理。
warmup: Run a 5 epoch warmup to the initial lr.
1
warm_up核心代码
learning_rate = cfg.learning_rate
boundaries = cfg.lr_steps # _C.lr_steps = [65000, 68000]
gamma = cfg.lr_gamma #_C.lr_gamma = 0.1
step_num = len(cfg.lr_steps)
# values = [lr*0.1^0, lr*0.1^1, lr*0.1^2]
values = [learning_rate * (gamma**i) for i in range(step_num + 1)] #_C.lr_gamma = 0.1
optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries,
values=values,
warmup_iter=cfg.warm_up_iter,
warmup_factor=cfg.warm_up_factor),
regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
momentum=cfg.momentum)
def exponential_with_warmup_decay(learning_rate, boundaries, values,
warmup_iter, warmup_factor):
global_step = lr_scheduler._decay_step_counter()
lr = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_iter_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warmup_iter), force_cpu=True)
with control_flow.Switch() as switch:
with switch.case(global_step < warmup_iter_var):
alpha = global_step / warmup_iter_var
# factor range: [warmup_factor, alpha_final] -> [warmup_factor, 1]
factor = warmup_factor * (1 - alpha) + alpha
# decayed_lr: [lr*warmup_factor, lr],在这里即实现了warm_up
decayed_lr = learning_rate * factor
fluid.layers.assign(decayed_lr, lr)
for i in range(len(boundaries)): # len(boundaries))=2
boundary_val = fluid.layers.fill_constant(
shape=[1],
dtype='float32',
value=float(boundaries[i]),
force_cpu=True)
value_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(global_step < boundary_val):
fluid.layers.assign(value_var, lr)
last_value_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(values[len(values) - 1]))
with switch.default():
fluid.layers.assign(last_value_var, lr)
return lr
```
---------------------
作者:Direwolf_0
来源:CSDN
原文:https://blog.csdn.net/weixin_43747587/article/details/91444781
版权声明:本文为博主原创文章,转载请附上博文链接!