1. 初始化权重
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
init.xavier_uniform(self.conv1.weight)
init.constant(self.conv1.bias, 0.1)
def weights_init(m):
classname=m.__class__.__name__
if classname.find('Conv') != -1:
xavier(m.weight.data)
xavier(m.bias.data)
net = Net()#构建网络
net.apply(weights_init) #apply函数会递归地搜索网络内的所有module并把参数表示的函数应用到所有的module上。
#对所有的Conv层都初始化权重.
for m in model.modules():
if isinstance(m,nn.Conv2d):
nn.init.normal(m.weight.data)
nn.init.xavier_normal(m.weight.data)
nn.init.kaiming_normal(m.weight.data)#卷积层参数初始化
m.bias.data.fill_(0)
elif isinstance(m,nn.Linear):
m.weight.data.normal_()#全连接层参数初始化
2. 加载模型
pretrained_dict = ...
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict )
- PyTorch的Module.modules()和Module.children()
- 在PyTorch中,所有的neural network module都是class torch.nn.Module的子类,在Modules中可以包含其它的Modules,以一种树状结构进行嵌套。当需要返回神经网络中的各个模块时,Module.modules()方法返回网络中所有模块的一个iterator,而Module.children()方法返回所有直接子模块的一个iterator。
list(nn.Sequential(nn.Linear(10, 20), nn.ReLU()).modules())
Out[9]:
[Sequential (
(0): Linear (10 -> 20)
(1): ReLU ()
), Linear (10 -> 20), ReLU ()]
In [10]: list(nn.Sequential(nn.Linear(10, 20), nn.ReLU()).children())
Out[10]: [Linear (10 -> 20), ReLU ()]
- 选择特定的层进行finetune
- 先使用Module.children()方法查看网络的直接子模块,将不需要调整的模块中的参数设置为param.requires_grad = False,同时用一个list收集需要调整的模块中的参数。具体代码为:
count = 0
para_optim = []
for k in model.children():
count += 1
# 6 should be changed properly
if count > 6:
for param in k.parameters():
para_optim.append(param)
else:
for param in k.parameters():
param.requires_grad = False
optimizer = optim.RMSprop(para_optim, lr)
3. 固定部分参数训练
# 只有True的才训练
optimizer.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
- 另外一个小技巧就是在nn.Module里,可以在中间插入这个,这样前面的参数就是False,而后面的不变
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
for p in self.parameters():
p.requires_grad=False
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
4. 优化
optimizer = optim.Adam([
{'params': [param for name, param in net.named_parameters() if name[-4:] == 'bias'],
'lr': 2 * args['lr']},
{'params': [param for name, param in net.named_parameters() if name[-4:] != 'bias'],
'lr': args['lr'], 'weight_decay': args['weight_decay']}
], betas=(args['momentum'], 0.999))
5. 加载部分权重
# 获得模型的键值
keys=[]
for k,v in desnet.state_dict().items():
if v.shape:
keys.append(k)
print(k,v.shape)
# 从预训练文件中加载权重
state={}
pretrained_dict = torch.load('/home/lulu/pytorch/Paper_Code/weights/densenet121-a639ec97.pth')
for i,(k,v) in enumerate(pretrained_dict.items()):
if 'classifier' not in k:
state[keys[i]] = v
# 保存权重
torch.save(state,'/home/lulu/pytorch/Paper_Code/weights/densenet121.pth')