Droupout是指在深度网络的训练中,以一定的概率随机地“临时丢弃”一部分神经元。
具体来讲,Dropout作用于每份小批量训练数据,由于其随机丢弃部分神经元的机制,相当于每次迭代都在训练不同结构的神经网络。类似于Bagging方法,dropout可被认为是一种实用的大规模深度神经网络的模型集成算法。
Dropout的具体实现中,要求某个神经元节点激活值以一定的概率p被“丢弃”,即该神经元暂时停止工作。对于任意神经元,每次训练中都与一组随机挑选的不同的神经元集合共同进行优化,这个过程会减弱全体神经元之间的联合适应性,减少过拟合的风险,增加泛化能力。
在训练时,随机选出隐藏层的神经元,然后将其删除。被删除的神经元不再进行信号的传递,训练时,每传递一次数据,就会随机选择要删除的神经元。然后,测试时,虽然会传递所有的神经元信号,但是对于各个神经元的输出,要乘上训练时的删除比例后再输出。
参考:
https://blog.csdn.net/guyuealian/article/details/88426648
正则化的类:
class Regularization(torch.nn.Module):
def __init__(self,model,weight_decay,p=2):
'''
:param model 模型
:param weight_decay:正则化参数
:param p: 范数计算中的幂指数值,默认求2范数,
当p=0为L2正则化,p=1为L1正则化
'''
super(Regularization, self).__init__()
if weight_decay <= 0:
print("param weight_decay can not <=0")
exit(0)
self.model=model
self.weight_decay=weight_decay
self.p=p
self.weight_list=self.get_weight(model)
self.weight_info(self.weight_list)
def to(self,device):
'''
指定运行模式
:param device: cude or cpu
:return:
'''
self.device=device
super().to(device)
return self
def forward(self, model):
self.weight_list=self.get_weight(model)#获得最新的权重
reg_loss = self.regularization_loss(self.weight_list, self.weight_decay, p=self.p)
return reg_loss
def get_weight(self,model):
'''
获得模型的权重列表
:param model:
:return:
'''
weight_list = []
for name, param in model.named_parameters():
if 'weight' in name:
weight = (name, param)
weight_list.append(weight)
return weight_list
def regularization_loss(self,weight_list, weight_decay, p=2):
'''
计算张量范数
:param weight_list:
:param p: 范数计算中的幂指数值,默认求2范数
:param weight_decay:
:return:
'''
# weight_decay=Variable(torch.FloatTensor([weight_decay]).to(self.device),requires_grad=True)
# reg_loss=Variable(torch.FloatTensor([0.]).to(self.device),requires_grad=True)
# weight_decay=torch.FloatTensor([weight_decay]).to(self.device)
# reg_loss=torch.FloatTensor([0.]).to(self.device)
reg_loss=0
for name, w in weight_list:
l2_reg = torch.norm(w, p=p)
reg_loss = reg_loss + l2_reg
reg_loss=weight_decay*reg_loss
return reg_loss
def weight_info(self,weight_list):
'''
打印权重列表信息
:param weight_list:
:return:
'''
print("---------------regularization weight---------------")
for name ,w in weight_list:
print(name)
print("---------------------------------------------------")
文章中有使用方法,我用在我的数据集里(上一篇博客的数据集,数据处理见上一篇博客)如下:
class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.ReLU()
self.layer3 = nn.Linear(num_hidden, num_hidden)
self.layer4 = nn.ReLU()
self.layer5 = nn.Linear(num_hidden, num_hidden)
self.layer6 = nn.ReLU()
self.layer7 = nn.Linear(num_hidden, num_output)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.layer8(x)
return x
torch.backends.cudnn.benchmark = True
device ="cpu"
weight_decay=0.01 # 正则化参数
model = module_net(8,10,1).to(device)
#初始化正则化
if weight_decay>0:
reg_loss=Regularization(model, weight_decay, p=1).to(device) #p=1为L1正则化,P=2为L2正则化
else:
print("no regularization")
criterion = nn.BCEWithLogitsLoss().to(device) # CrossEntropyLoss=softmax+cross entropy
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)#不需要指定参数weight_decay
Loss_list = [] #用来装loss值,以便之后画图
Accuracy_list = [] #用来装准确率,以便之后画图
for e in range(15000):
out = model.forward(Variable(x)) #这里省略了 mo_net.forward()
loss = criterion(out, Variable(y))
Loss_list.append(loss.data[0])
#--------------------用于求准确率-------------------------#
out_class=(out[:]>0).float() #将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num=torch.sum(y==out_class).float() #分类对的数值
precision=right_num/out.shape[0] #准确率
#--------------------求准确率结束-------------------------#
Accuracy_list.append(precision)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {},precision{},right_num{}'.format(e+1, loss.data[0],precision,right_num))
x1=list(range(15000))
plt.plot(x1, Loss_list,c='red',label='loss')
plt.plot(x1, Accuracy_list,c='blue',label='precision')
plt.legend()
class Dropout:
def __init__(self,dropout_ratio=0.5):
self.dropout_ratio=dropout_ratio
self.mask=None
def forward(self,x,train_flg=True):
if train_flg:
self.mask=np.random.rand(*x.shape)>self.dropot_ratio
return x*self.mask
else:
return x*(1.0-self.dropout_ratio)
def backward(self,dout):
return dout*self.mask
这里的要点是,每次正向传播时,self.mask中都会以False的形式保存要删除的神经元。self.mask会随机生成和x形状相同的数组,并将值比dropout_ratio大的元素设为True。反向传播时的行为和Relu相同,也就是说正向传播时传递了的信号的神经元,反向传播时按原样传递信号,正向传播时没有传递信号的神经元,反向传播时信号将停在那里。
pytorch实现Dropout只需要在构建网络的时候加上 nn.Dropout§层,括号里面这里的 p 指的是随机有 p 的神经元会被关闭/丢弃,我这里改变的网络结构如下:
class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.ReLU()
self.layer3 = nn.Linear(num_hidden, num_hidden)
self.dropout3 = nn.Dropout(p=0.5)
self.layer4 = nn.ReLU()
self.layer5 = nn.Linear(num_hidden, num_hidden)
self.dropout5 = nn.Dropout(p=0.5)
self.layer6 = nn.ReLU()
self.layer7 = nn.Linear(num_hidden, num_output)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.dropout3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.dropout5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
后面的训练代码与上面一样,训练一万次结果如下:
在测试集里表现如下:
可以看到测试集和训练集的误差之差小了很多,过拟合情况减少了很多。
包括数据集的制作,全部代码如下:
import torch
import numpy as np
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
%matplotlib inline
#--------------------------------------正则化类----------------------------------------------------------------#
class Regularization(torch.nn.Module):
def __init__(self,model,weight_decay,p=2):
'''
:param model 模型
:param weight_decay:正则化参数
:param p: 范数计算中的幂指数值,默认求2范数,
当p=0为L2正则化,p=1为L1正则化
'''
super(Regularization, self).__init__()
if weight_decay <= 0:
print("param weight_decay can not <=0")
exit(0)
self.model=model
self.weight_decay=weight_decay
self.p=p
self.weight_list=self.get_weight(model)
self.weight_info(self.weight_list)
def to(self,device):
'''
指定运行模式
:param device: cude or cpu
:return:
'''
self.device=device
super().to(device)
return self
def forward(self, model):
self.weight_list=self.get_weight(model)#获得最新的权重
reg_loss = self.regularization_loss(self.weight_list, self.weight_decay, p=self.p)
return reg_loss
def get_weight(self,model):
'''
获得模型的权重列表
:param model:
:return:
'''
weight_list = []
for name, param in model.named_parameters():
if 'weight' in name:
weight = (name, param)
weight_list.append(weight)
return weight_list
def regularization_loss(self,weight_list, weight_decay, p=2):
'''
计算张量范数
:param weight_list:
:param p: 范数计算中的幂指数值,默认求2范数
:param weight_decay:
:return:
'''
# weight_decay=Variable(torch.FloatTensor([weight_decay]).to(self.device),requires_grad=True)
# reg_loss=Variable(torch.FloatTensor([0.]).to(self.device),requires_grad=True)
# weight_decay=torch.FloatTensor([weight_decay]).to(self.device)
# reg_loss=torch.FloatTensor([0.]).to(self.device)
reg_loss=0
for name, w in weight_list:
l2_reg = torch.norm(w, p=p)
reg_loss = reg_loss + l2_reg
reg_loss=weight_decay*reg_loss
return reg_loss
def weight_info(self,weight_list):
'''
打印权重列表信息
:param weight_list:
:return:
'''
print("---------------regularization weight---------------")
for name ,w in weight_list:
print(name)
print("---------------------------------------------------")
#----------------------------数据处理-----------------------------------#
data = pd.read_csv('diabetes.csv')
data1=data.copy()
y=data1.loc[:,['Outcome']] #数据标签
del data1['Outcome']
x = data1 #数据
x_train, x_test,y_train,y_test= train_test_split(x, y, test_size=0.3,random_state=2018) #数据集三七分,随机种子2018
ss = StandardScaler()
x_train = ss.fit_transform(x_train) #数据标准化
x_test = ss.fit_transform(x_test) #数据标准化
#-----------------------------转化为tensor--------------------------#
x_train_tensor=torch.from_numpy(x_train)
x_test_tensor=torch.from_numpy(x_test)
y_train_numpy=np.array(y_train)
y_train_tensor=torch.from_numpy(y_train_numpy)
y_test_numpy=np.array(y_test)
y_test_tensor=torch.from_numpy(y_test_numpy)
x=x_train_tensor.float()
y=y_train_tensor.float()
#-----------------------------网络构建--------------------------#
class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.ReLU()
self.layer3 = nn.Linear(num_hidden, num_hidden)
self.dropout3 = nn.Dropout(p=0.5)
self.layer4 = nn.ReLU()
self.layer5 = nn.Linear(num_hidden, num_hidden)
self.dropout5 = nn.Dropout(p=0.5)
self.layer6 = nn.ReLU()
self.layer7 = nn.Linear(num_hidden, num_output)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.dropout3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.dropout5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
#----------------------------模型训练--------------------------#
torch.backends.cudnn.benchmark = True
device ="cpu"
weight_decay=0.01 # 正则化参数
model = module_net(8,10,1).to(device)
#初始化正则化
if weight_decay>0:
reg_loss=Regularization(model, weight_decay, p=2).to(device)
else:
print("no regularization")
criterion = nn.BCEWithLogitsLoss().to(device) # CrossEntropyLoss=softmax+cross entropy
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)#不需要指定参数weight_decay
Loss_list = [] #用来装loss值,以便之后画图
Accuracy_list = [] #用来装准确率,以便之后画图
for e in range(10000):
out = model.forward(Variable(x)) #这里省略了 mo_net.forward()
loss = criterion(out, Variable(y))
Loss_list.append(loss.data[0])
#--------------------用于求准确率-------------------------#
out_class=(out[:]>0).float() #将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num=torch.sum(y==out_class).float() #分类对的数值
precision=right_num/out.shape[0] #准确率
#--------------------求准确率结束-------------------------#
Accuracy_list.append(precision)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {},precision{},right_num{}'.format(e+1, loss.data[0],precision,right_num))
x1=list(range(10000))
plt.plot(x1, Loss_list,c='red',label='loss')
plt.plot(x1, Accuracy_list,c='blue',label='precision')
plt.legend()
#-----------------------------模型预测--------------------------#
x_test_tensor=x_test_tensor.float()
y_test_tensor=y_test_tensor.float()
out_test=model.forward(Variable(x_test_tensor))
loss_test = criterion(out_test, Variable(y_test_tensor))
out_test_class=(out_test[:]>0).float() #将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num_test=torch.sum(y_test_tensor==out_test_class).float() #分类对的数值
precision_test=right_num_test/out_test.shape[0] #准确率
loss_test=loss_test.data[0]
print('loss_test:{},precision_test:{},right_num_test:{}'.format(loss_test,precision_test,right_num_test))