1、求网络模型参数:
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('number of params:', n_parameters)
2、打印模型参数名称,以及创建参数字典
print(f"\n{'#'*20}\n print the parameters of model\n")
for n, p in model_without_ddp.named_parameters():
print(n)
print(f"{'#'*20}\n\n")
param_dicts = [
{
"params":
[p for n, p in model_without_ddp.named_parameters()
if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
"lr": args.lr,
},
{
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad],
"lr": args.lr_backbone,
},
{
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
"lr": args.lr * args.lr_linear_proj_mult,
}
]
3、加载模型
if args.resume:
if args.resume.startswith('https'):
checkpoint = torch.hub.load_state_dict_from_url(
args.resume, map_location='cpu', check_hash=True)
else:
checkpoint = torch.load(args.resume, map_location='cpu')
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
if len(missing_keys) > 0:
print('Missing Keys: {}'.format(missing_keys))
if len(unexpected_keys) > 0:
print('Unexpected Keys: {}'.format(unexpected_keys))
if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
import copy
p_groups = copy.deepcopy(optimizer.param_groups)
optimizer.load_state_dict(checkpoint['optimizer'])
for pg, pg_old in zip(optimizer.param_groups, p_groups):
pg['lr'] = pg_old['lr']
pg['initial_lr'] = pg_old['initial_lr']
print(optimizer.param_groups)
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
# todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance).
args.override_resumed_lr_drop = True
if args.override_resumed_lr_drop:
print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.')
lr_scheduler.step_size = args.lr_drop
lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
lr_scheduler.step(lr_scheduler.last_epoch)
args.start_epoch = checkpoint['epoch'] + 1
# check the resumed model
if not args.eval:
test_stats, coco_evaluator = evaluate(
model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir
)
4、prefetcher 加速数据读取
# 使用:
prefetcher = data_prefetcher(data_loader, device, prefetch=True)
samples, targets = prefetcher.next()
for _ in metric_logger.log_every(range(len(data_loader)), print_freq, header):
outputs = model(samples)
loss_dict = criterion(outputs, targets)
optimizer.zero_grad()
losses.backward()
optimizer.step()
samples, targets = prefetcher.next()
# 源码
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch
def to_cuda(samples, targets, device):
samples = samples.to(device, non_blocking=True)
targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
return samples, targets
class data_prefetcher():
def __init__(self, loader, device, prefetch=True):
self.loader = iter(loader)
self.prefetch = prefetch
self.device = device
if prefetch:
self.stream = torch.cuda.Stream()
self.preload()
def preload(self):
try:
self.next_samples, self.next_targets = next(self.loader)
except StopIteration:
self.next_samples = None
self.next_targets = None
return
# if record_stream() doesn't work, another option is to make sure device inputs are created
# on the main stream.
# self.next_input_gpu = torch.empty_like(self.next_input, device='cuda')
# self.next_target_gpu = torch.empty_like(self.next_target, device='cuda')
# Need to make sure the memory allocated for next_* is not still in use by the main stream
# at the time we start copying to next_*:
# self.stream.wait_stream(torch.cuda.current_stream())
with torch.cuda.stream(self.stream):
self.next_samples, self.next_targets = to_cuda(self.next_samples, self.next_targets, self.device)
# more code for the alternative if record_stream() doesn't work:
# copy_ will record the use of the pinned source tensor in this side stream.
# self.next_input_gpu.copy_(self.next_input, non_blocking=True)
# self.next_target_gpu.copy_(self.next_target, non_blocking=True)
# self.next_input = self.next_input_gpu
# self.next_target = self.next_target_gpu
# With Amp, it isn't necessary to manually convert data to half.
# if args.fp16:
# self.next_input = self.next_input.half()
# else:
def next(self):
if self.prefetch:
torch.cuda.current_stream().wait_stream(self.stream)
samples = self.next_samples
targets = self.next_targets
if samples is not None:
samples.record_stream(torch.cuda.current_stream())
if targets is not None:
for t in targets:
for k, v in t.items():
v.record_stream(torch.cuda.current_stream())
self.preload()
else:
try:
samples, targets = next(self.loader)
samples, targets = to_cuda(samples, targets, self.device)
except StopIteration:
samples = None
targets = None
return samples, targets
5、