对抗训练是一种引入噪声的训练方式,可以对参数进行正则化,提升模型鲁棒性和泛化能力
常见的对抗训练有:fgsm、fgm、pgd、FreeAT、yopo、FreeLB、smart,AWP
这里给出fgsm、fgm、pgd、FreeAT的代码以及实验结果
仓库地址如下:GTyingzi/Compare_Adversial (github.com)
class FGSM:
def __init__(self, model, eps=1):
self.model = model
self.eps = eps
self.backup = {}
def attack(self, emb_name='embedding'):
# emb_name这个参数要换成你模型中embedding的参数名
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
self.backup[name] = param.data.clone()
r_at = self.eps * param.grad.sign()
param.data.add_(r_at)
def restore(self, emb_name='embedding'):
# emb_name这个参数要换成你模型中embedding的参数名
for name, para in self.model.named_parameters():
if para.requires_grad and emb_name in name:
assert name in self.backup
para.data = self.backup[name]
self.backup = {}
fgsm = FGSM(model=model)
for i,(trains,labels) in enumerate(train_iter):
# 正常训练
outputs = model(trains)
loss = F.cross_entropy(outputs,labels)
loss.backward() # 反向传播得到正常的grad
# 对抗训练
fgsm.attack() # 在embedding上添加对抗扰动
outputs = model(trains)
loss_adv = F.cross_entropy(outputs,labels)
loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
fgsm.restore() # 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
model.zero_grad()
class FGM:
def __init__(self, model, eps=1):
self.model = model
self.eps = eps
self.backup = {}
def attack(self, emb_name='embedding'):
# emb_name这个参数要换成你模型中embedding的参数名
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
self.backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm and not torch.isnan(norm):
r_at = self.eps * param.grad / norm
param.data.add_(r_at)
def restore(self, emb_name='embedding'):
# emb_name这个参数要换成你模型中embedding的参数名
for name, para in self.model.named_parameters():
if para.requires_grad and emb_name in name:
assert name in self.backup
para.data = self.backup[name]
self.backup = {}
fgm = FGM(model=model)
for i,(trains,labels) in enumerate(train_iter):
# 正常训练
outputs = model(trains)
loss = F.cross_entropy(outputs,labels)
loss.backward() # 反向传播得到正常的grad
# 对抗训练
fgm.attack() # 在embedding上添加对抗扰动
outputs = model(trains)
loss_adv = F.cross_entropy(outputs,labels)
loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
fgm.restore() # 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
model.zero_grad()
# PGD
class PGD:
def __init__(self, model, eps=1, alpha=0.3):
self.model = model
self.eps = eps
self.alpha = alpha
self.emb_backup = {}
self.grad_backup = {}
def attack(self, emb_name='embedding', is_first_attack=False):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
if is_first_attack:
self.emb_backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm != 0 and not torch.isnan(norm):
r_at = self.alpha * param.grad / norm
param.data.add_(r_at)
param.data = self.project(name, param.data)
def restore(self, emb_name='embedding'):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
assert name in self.emb_backup
param.data = self.emb_backup[name]
self.emb_backup = {}
def project(self, param_name, param_data):
r = param_data - self.emb_backup[param_name]
if torch.norm(r) > self.eps:
r = self.eps * r / torch.norm(r)
return self.emb_backup[param_name] + r
def backup_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
self.grad_backup[name] = param.grad.clone()
def restore_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
param.grad = self.grad_backup[name]
pgd = PGD(model=model)
for i,(trains,labels) in enumerate(train_iter):
# 正常训练
outputs = model(trains)
loss = F.cross_entropy(outputs,labels)
loss.backward() # 反向传播得到正常的grad
# 对抗训练
pgd_k = 3
for _t in range(pgd_k):
pgd.attack(is_first_attack=(_t == 0))# 在embedding上添加对抗扰动, first attack时备份param.data
if _t != pgd_k - 1:
model.zero_grad()
else:
pgd.restore_grad()
outputs = model(trains)
loss_adv = F.cross_entropy(outputs,labels)
loss_adv.backward()# 反向传播,并在正常的grad基础上,累加对抗训练的梯度
pgd.restore()# 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
model.zero_grad()
class FreeAT:
def __init__(self, model, eps=0.1):
self.model = model
self.eps = eps
self.emb_backup = {}
self.grad_backup = {}
self.last_r_at = 0
def attack(self, emb_name='embedding', is_first_attack=False):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
if is_first_attack:
self.emb_backup[name] = param.data.clone()
param.data.add_(self.last_r_at)
param.data = self.project(name, param.data)
self.last_r_at = self.last_r_at + self.eps * param.grad.sign()
def restore(self, emb_name='embedding'):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
assert name in self.emb_backup
param.data = self.emb_backup[name]
self.emb_backup = {}
def project(self, param_name, param_data):
r = param_data - self.emb_backup[param_name]
if torch.norm(r) > self.eps:
r = self.eps * r / torch.norm(r)
return self.emb_backup[param_name] + r
def backup_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
self.grad_backup[name] = param.grad.clone()
def restore_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
param.grad = self.grad_backup[name]
free_at = FreeAT(model=model)
for i,(trains,labels) in enumerate(train_iter):
# 正常训练
outputs = model(trains)
loss = F.cross_entropy(outputs,labels)
loss.backward() # 反向传播得到正常的grad
# 对抗训练
m = 5
for _t in range(m):
free_at.attack(is_first_attack=(_t == 0))# 在embedding上添加对抗扰动, first attack时备份param.data
if _t != pgd_k - 1:
model.zero_grad()
else:
free_at.restore_grad()
outputs = model(trains)
loss_adv = F.cross_entropy(outputs,labels)
loss_adv.backward()# 反向传播,并在正常的grad基础上,累加对抗训练的梯度
free_at.restore()# 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
model.zero_grad()
baseline+attack_train | precision | recall | F1 |
---|---|---|---|
TextCNN | 0.9083 | 0.9078 | 0.9079 |
TextCNN + FGSM | 0.9105 | 0.9103 | 0.9103 |
TextCNN + FGM | 0.9110 | 0.9104 | 0.9105 |
TextCNN + PGD | 0.9103 | 0.9098 | 0.9099 |
TextCNN + FreeAT | 0.9104 | 0.9097 | 0.9096 |
baseline+attack_train | precision | recall | F1 |
---|---|---|---|
TextRNN | 0.9046 | 0.9034 | 0.9038 |
TextRNN + FGSM | 0.9068 | 0.9055 | 0.9058 |
TextRNN + FGM | 0.9160 | 0.9161 | 0.9160 |
TextRNN + PGD | 0.9144 | 0.9142 | 0.9140 |
TextRNN + FreeAT | 0.9064 | 0.9062 | 0.9059 |
参考资料:
attack_train/Attack-Train-Compare-Pytorch at main · tanshoudong/attack_train (github.com)
(517条消息) 对抗训练fgm、fgsm和pgd原理和源码分析_谈笑风生…的博客-CSDN博客_pgd代码
一文搞懂NLP中的对抗训练FGSM/FGM/PGD/FreeAT/YOPO/FreeLB/SMART - 知乎 (zhihu.com)
(22条消息) 对抗学习总结:FGSM->FGM->PGD->FreeAT, YOPO ->FreeLb->SMART->LookAhead->VAT_zhurui_xiaozhuzaizai的博客-CSDN博客
lonePatient/TorchBlocks: A PyTorch-based toolkit for natural language processing (github.com)