关于warm_up学习率


关于warm-up:tf的models里面提到warm-upw为5epoch,所以上面计算的2.16epoch相对合理。
warmup: Run a 5 epoch warmup to the initial lr.
1
warm_up核心代码

    learning_rate = cfg.learning_rate
    boundaries = cfg.lr_steps    # _C.lr_steps = [65000, 68000]
    gamma = cfg.lr_gamma    #_C.lr_gamma = 0.1
    step_num = len(cfg.lr_steps)
    # values = [lr*0.1^0, lr*0.1^1, lr*0.1^2]
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]    #_C.lr_gamma = 0.1

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=cfg.warm_up_iter,
            warmup_factor=cfg.warm_up_factor),
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)

 


def exponential_with_warmup_decay(learning_rate, boundaries, values,
                                  warmup_iter, warmup_factor):
    global_step = lr_scheduler._decay_step_counter()

    lr = fluid.layers.create_global_var(
        shape=[1],
        value=0.0,
        dtype='float32',
        persistable=True,
        name="learning_rate")

    warmup_iter_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(warmup_iter), force_cpu=True)

    with control_flow.Switch() as switch:
        with switch.case(global_step < warmup_iter_var):
            alpha = global_step / warmup_iter_var
            # factor range:  [warmup_factor, alpha_final] -> [warmup_factor, 1]
            factor = warmup_factor * (1 - alpha) + alpha
            # decayed_lr: [lr*warmup_factor, lr],在这里即实现了warm_up
            decayed_lr = learning_rate * factor
            fluid.layers.assign(decayed_lr, lr)

        for i in range(len(boundaries)):    # len(boundaries))=2
            boundary_val = fluid.layers.fill_constant(
                shape=[1],
                dtype='float32',
                value=float(boundaries[i]),
                force_cpu=True)
            value_var = fluid.layers.fill_constant(
                shape=[1], dtype='float32', value=float(values[i]))
            with switch.case(global_step < boundary_val):
                fluid.layers.assign(value_var, lr)

        last_value_var = fluid.layers.fill_constant(
            shape=[1], dtype='float32', value=float(values[len(values) - 1]))
        with switch.default():
            fluid.layers.assign(last_value_var, lr)

    return lr
    ```


        
--------------------- 
作者:Direwolf_0 
来源:CSDN 
原文:https://blog.csdn.net/weixin_43747587/article/details/91444781 
版权声明:本文为博主原创文章,转载请附上博文链接!

你可能感兴趣的:(ML&DL)