常规操作
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> optimizer.zero_grad()
>>> loss_fn(model(input), target).backward()
>>> optimizer.step()
在main/train.py出现了optim模块的使用
# build optimizer
optimizer = optim_builder.build(task, task_cfg.optim, model)
在videoanalyst/optim/builder.py定义optim模块,,主要通过optimizer实现,但又分成了两个部分。
optimizer = optimizer_builder.build(task, cfg.optimizer, model)
if ("grad_modifier" in cfg) and (cfg.grad_modifier.name != ""):
grad_modifier = grad_modifier_builder.build(task, cfg.grad_modifier)
optimizer.set_grad_modifier(grad_modifier)
在videoanalyst/optim/optimizer/optimizer_impl/sgd.py定义了主体self._optimizer=torch类SGD
。
def update_params(self, ):
super(SGD, self).update_params()
params = self._state["params"]
kwargs = self._hyper_params
valid_keys = self.extra_hyper_params.keys()
kwargs = {k: kwargs[k] for k in valid_keys}
self._optimizer = optim.SGD(params, **kwargs)
videoanalyst/optim/optimizer/optimizer_base.py是重点,定义了第二个主体self._grad_modifie
r和功能self._grad_modifier.modify_grad(self._model, epoch, iteration)
,同时把lr_multiplier
和lr_policy
传递到self._state
里面,主要应用于函数
def schedule(self, epoch: int, iteration: int) -> Dict:
r"""
an interface for optimizer scheduling (e.g. adjust learning rate)
self.set_scheduler need to be called during initialization phase
"""
schedule_info = dict()
if "lr_policy" in self._state:
lr = self._state["lr_policy"].get_lr(epoch, iteration)
schedule_lr(self._optimizer, lr)
schedule_info["lr"] = lr
# apply learning rate multiplication
if "lr_multiplier" in self._state:
self._state["lr_multiplier"].multiply_lr(self._optimizer)
return schedule_info。
其中config文件如下
lr_policy:
- >
{
"name": "LinearLR",
"start_lr": 0.000001,
"end_lr": 0.08,
"max_epoch": 1
}
- >
{
"name": "CosineLR",
"start_lr": 0.08,
"end_lr": 0.000001,
"max_epoch": 19
}
lr_multiplier:
- >
{
"name": "backbone",
"regex": "basemodel",
"ratio": 0.1
}
- >
{
"name": "other",
"regex": "^((?!basemodel).)*$",
"ratio": 1
}
主要实现一个功能就是在指定epoch freeze特定卷积层。
config文件如下
grad_modifier:
name: "DynamicFreezer"
DynamicFreezer:
schedule:
- >
{
"name": "isConv",
"regex": "basemodel.*\\.conv\\.",
"epoch": 0,
"freezed": true
}
- >
{
"name": "isConvStage4",
"regex": "basemodel\\.Mixed_6.*\\.conv\\.",
"epoch": 10,
"freezed": false
}
- >
{
"name": "isConvStage3",
"regex": "basemodel\\.Mixed_5.*\\.conv\\.",
"epoch": 10,
"freezed": false
}
videoanalyst/optim/grad_modifier/grad_modifier_impl/dynamic_freezer.py实现如下
def modify_grad(self, module: nn.Module, epoch: int, iteration: int = -1):
if (iteration < 0) and ("schedule" in self._state):
# epoch-level scheduling
apply_freeze_schedule(module, epoch, self._state["schedule"])
else:
# iteration-level scheduling
pass