drop_prob1=0.2,drop_prob2=0.5
两个丢弃概率超参数对调前
#定义神经网络模型,将各个层声明清楚
net=nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob1),
nn.Linear(num_hiddens1,num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob2),
nn.Linear(num_hiddens2,10))
#模型参数的初始化
for param in net.parameters():
nn.init.normal_(param,mean=0,std=0.01)
运行结果:
epoch 1, loss 0.0048, train acc 0.531, test acc 0.740
epoch 2, loss 0.0023, train acc 0.780, test acc 0.804
epoch 3, loss 0.0020, train acc 0.817, test acc 0.834
epoch 4, loss 0.0018, train acc 0.834, test acc 0.842
epoch 5, loss 0.0017, train acc 0.844, test acc 0.848
两个丢弃概率超参数对调后
#定义神经网络模型,将各个层声明清楚
net=nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob2),
nn.Linear(num_hiddens1,num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob1),
nn.Linear(num_hiddens2,10))
#模型参数的初始化
for param in net.parameters():
nn.init.normal_(param,mean=0,std=0.01)
#定义优化器
optimizer=torch.optim.SGD(net.parameters(),lr=0.5)
#训练模型(指定实参)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,
batch_size,None,None,optimizer)
运行结果:
epoch 1, loss 0.0046, train acc 0.547, test acc 0.764
epoch 2, loss 0.0024, train acc 0.772, test acc 0.791
epoch 3, loss 0.0020, train acc 0.812, test acc 0.774
epoch 4, loss 0.0018, train acc 0.827, test acc 0.838
epoch 5, loss 0.0017, train acc 0.837, test acc 0.848
总结:
增大靠近输入层的丢弃率后模型不容易收敛。
!!!通常的建议是把靠近输入层的丢弃概率设的小一点
使用丢弃法:
#定义神经网络模型,将各个层声明清楚
#模型参数的初始化
net=nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob1),#drop_prob1=0.2
nn.Linear(num_hiddens1,num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob2),#drop_prob2=0.5
nn.Linear(num_hiddens2,10))
for param in net.parameters():
nn.init.normal_(param,mean=0,std=0.01)
optimizer=torch.optim.SGD(net.parameters(),lr=0.5)
#训练模型(指定实参)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,
batch_size,None,None,optimizer)
运行结果:
epoch 1, loss 0.0045, train acc 0.548, test acc 0.741
epoch 2, loss 0.0023, train acc 0.786, test acc 0.756
epoch 3, loss 0.0019, train acc 0.823, test acc 0.807
epoch 4, loss 0.0017, train acc 0.837, test acc 0.832
epoch 5, loss 0.0016, train acc 0.849, test acc 0.776
epoch 6, loss 0.0016, train acc 0.854, test acc 0.827
epoch 7, loss 0.0015, train acc 0.862, test acc 0.856
epoch 8, loss 0.0014, train acc 0.866, test acc 0.862
epoch 9, loss 0.0014, train acc 0.870, test acc 0.856
epoch 10, loss 0.0013, train acc 0.874, test acc 0.839
不使用丢弃法:
(丢弃概率均为0为不使用丢弃法,即drop_prob1=0,drop_prob2=0)
#定义神经网络模型,将各个层声明清楚
net=nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens1),
nn.ReLU(),
nn.Dropout(0),#drop_prob=0时为不使用丢弃法
nn.Linear(num_hiddens1,num_hiddens2),
nn.ReLU(),
nn.Dropout(0),
nn.Linear(num_hiddens2,10))
#模型参数的初始化
for param in net.parameters():
nn.init.normal_(param,mean=0,std=0.01)
optimizer=torch.optim.SGD(net.parameters(),lr=0.5)
#训练模型(指定实参)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,
batch_size,None,None,optimizer)
运行结果:
epoch 1, loss 0.0044, train acc 0.559, test acc 0.742
epoch 2, loss 0.0022, train acc 0.792, test acc 0.705
epoch 3, loss 0.0018, train acc 0.831, test acc 0.802
epoch 4, loss 0.0016, train acc 0.849, test acc 0.843
epoch 5, loss 0.0015, train acc 0.857, test acc 0.779
epoch 6, loss 0.0014, train acc 0.862, test acc 0.837
epoch 7, loss 0.0013, train acc 0.873, test acc 0.838
epoch 8, loss 0.0013, train acc 0.876, test acc 0.834
epoch 9, loss 0.0012, train acc 0.881, test acc 0.871
epoch 10, loss 0.0012, train acc 0.884, test acc 0.859
(作者正在思考总结,答案后续揭晓)
将wd设置为0.01(也可以自己设置参数的值)
net=nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob1),
nn.Linear(num_hiddens1,num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob2),
nn.Linear(num_hiddens2,10))
for param in net.parameters():
nn.init.normal_(param,mean=0,std=0.01)
optimizer=torch.optim.SGD(net.parameters(),lr=0.5,weight_decay=0.01)
#训练模型(指定实参)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,
batch_size,None,None,optimizer)
运行结果;
epoch 1, loss 0.0053, train acc 0.466, test acc 0.616
epoch 2, loss 0.0030, train acc 0.711, test acc 0.673
epoch 3, loss 0.0029, train acc 0.732, test acc 0.750
epoch 4, loss 0.0029, train acc 0.729, test acc 0.766
epoch 5, loss 0.0030, train acc 0.730, test acc 0.721
epoch 6, loss 0.0029, train acc 0.735, test acc 0.763
epoch 7, loss 0.0027, train acc 0.751, test acc 0.758
epoch 8, loss 0.0029, train acc 0.734, test acc 0.643
epoch 9, loss 0.0028, train acc 0.747, test acc 0.739
epoch 10, loss 0.0027, train acc 0.753, test acc 0.696
总结:
同时使用丢弃法和权重衰减并不理想,还不如单独使用丢弃法或者权重衰减。