之前的紧凑型网络存在着明显的欠拟合问题。为了进一步在参数容量不变的情况下增大网络的容量,我们参考SqeezeNet的设计理念:主干网络之前使用卷积进行升通道,连续采用Fire模块来进行挤压与扩张,以减小参数数量。基于此,我们构建了我们的mini SqueezeNet,用于该数据集上的分类任务。我们的网络架构如下:
CompactNet(
(conv1): Sequential(
(0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Fire(
(squeeze): Conv2d(16, 6, kernel_size=(1, 1), stride=(1, 1))
(squeeze_activation): ReLU(inplace=True)
(expand1x1): Conv2d(6, 12, kernel_size=(1, 1), stride=(1, 1))
(expand1x1_activation): ReLU(inplace=True)
(expand3x3): Conv2d(6, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(expand3x3_activation): ReLU(inplace=True)
)
(1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Fire(
(squeeze): Conv2d(24, 8, kernel_size=(1, 1), stride=(1, 1))
(squeeze_activation): ReLU(inplace=True)
(expand1x1): Conv2d(8, 14, kernel_size=(1, 1), stride=(1, 1))
(expand1x1_activation): ReLU(inplace=True)
(expand3x3): Conv2d(8, 14, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(expand3x3_activation): ReLU(inplace=True)
)
(3): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Fire(
(squeeze): Conv2d(28, 6, kernel_size=(1, 1), stride=(1, 1))
(squeeze_activation): ReLU(inplace=True)
(expand1x1): Conv2d(6, 4, kernel_size=(1, 1), stride=(1, 1))
(expand1x1_activation): ReLU(inplace=True)
(expand3x3): Conv2d(6, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(expand3x3_activation): ReLU(inplace=True)
)
(6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(fc): Sequential(
(0): Linear(in_features=32, out_features=32, bias=True)
(1): Dropout(p=0.5, inplace=False)
(2): Linear(in_features=32, out_features=10, bias=True)
)
)>
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 8, 8] 160
BatchNorm2d-2 [-1, 16, 8, 8] 32
ReLU-3 [-1, 16, 8, 8] 0
MaxPool2d-4 [-1, 16, 4, 4] 0
Conv2d-5 [-1, 6, 4, 4] 102
ReLU-6 [-1, 6, 4, 4] 0
Conv2d-7 [-1, 12, 4, 4] 84
ReLU-8 [-1, 12, 4, 4] 0
Conv2d-9 [-1, 12, 4, 4] 660
ReLU-10 [-1, 12, 4, 4] 0
Fire-11 [-1, 24, 4, 4] 0
BatchNorm2d-12 [-1, 24, 4, 4] 48
Conv2d-13 [-1, 8, 4, 4] 200
ReLU-14 [-1, 8, 4, 4] 0
Conv2d-15 [-1, 14, 4, 4] 126
ReLU-16 [-1, 14, 4, 4] 0
Conv2d-17 [-1, 14, 4, 4] 1,022
ReLU-18 [-1, 14, 4, 4] 0
Fire-19 [-1, 28, 4, 4] 0
BatchNorm2d-20 [-1, 28, 4, 4] 56
MaxPool2d-21 [-1, 28, 2, 2] 0
Conv2d-22 [-1, 6, 2, 2] 174
ReLU-23 [-1, 6, 2, 2] 0
Conv2d-24 [-1, 4, 2, 2] 28
ReLU-25 [-1, 4, 2, 2] 0
Conv2d-26 [-1, 4, 2, 2] 220
ReLU-27 [-1, 4, 2, 2] 0
Fire-28 [-1, 8, 2, 2] 0
BatchNorm2d-29 [-1, 8, 2, 2] 16
Linear-30 [-1, 32] 1,056
Dropout-31 [-1, 32] 0
Linear-32 [-1, 10] 330
================================================================
Total params: 4,314
Trainable params: 4,314
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.02
Estimated Total Size (MB): 0.07
参数规模与之前的相当,但是深度明显增加了。我们对该数据集进行了试训练,结果如下:
咦 我没看错吧?准确度之比10%高一点点,近乎随机啊!仔细想想才反应过来,深层的网络难于训练;尽管使用了BatchNorm方法较好地避免了梯度消失的问题,但是深层的网络容易陷入局部最优解,优化结果不尽人意。下一次我们将设法采取某些方法让网络更好地收敛起来!
附相关代码:
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
from sklearn.datasets import load_digits
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import torch as t
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
import torch.functional
from graphviz import Digraph
import torch
from torch.autograd import Variable
from torchsummary import summary
from PIL import Image
from torchvision.transforms import transforms
import random
from sklearn.metrics import log_loss
from torchvision import models
class train_mini_mnist(t.utils.data.Dataset):
def __init__(self):
self.X,self.y=load_digits(return_X_y=True)
self.X=self.X
self.X_train,self.X_test,self.y_train,self.y_test=train_test_split(self.X,self.y,random_state=0)
def __getitem__(self, index):
img, target = np.array(self.X_train[index].reshape(8,8),dtype=int), int(self.y_train[index])
img=transforms.ToPILImage()(img)
img=img.rotate(random.randint(-20,20))#填充白色
img=transforms.ToTensor()(img)
return img/15.,target
def __len__(self):
return len(self.y_train)
class test_mini_mnist(t.utils.data.Dataset):
def __init__(self):
self.X,self.y=load_digits(return_X_y=True)
self.X=self.X/15.
self.X_train,self.X_test,self.y_train,self.y_test=train_test_split(self.X,self.y,random_state=0)
def __getitem__(self, index):
return t.tensor(self.X_test[index].reshape(1,8,8),dtype=torch.float32),self.y_test[index]
def __len__(self):
return len(self.y_test)
BATCH_SIZE=8
LEARNING_RATE=3e-3
EPOCHES=100
T=5#T越大软化程度越大
train_data=train_mini_mnist()
test_data=test_mini_mnist()
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)
class Fire(nn.Module):
def __init__(self, inplanes, squeeze_planes,
expand1x1_planes, expand3x3_planes):
super(Fire, self).__init__()
self.inplanes = inplanes
self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
self.squeeze_activation = nn.ReLU(inplace=True)
self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
kernel_size=1)
self.expand1x1_activation = nn.ReLU(inplace=True)
self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
kernel_size=3, padding=1)
self.expand3x3_activation = nn.ReLU(inplace=True)
def forward(self, x):
x = self.squeeze_activation(self.squeeze(x))
return torch.cat([
self.expand1x1_activation(self.expand1x1(x)),
self.expand3x3_activation(self.expand3x3(x))
], 1)
class CompactNet(nn.Module):
def __init__(self):
super(CompactNet, self).__init__()
self.conv1 = nn.Sequential(#(1, 8, 8)
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3,3), stride=1, padding=(1,1)),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)#(32,4,4)
)
self.conv2 = nn.Sequential(
Fire(inplanes=16,squeeze_planes=6,expand1x1_planes=12,expand3x3_planes=12),
nn.BatchNorm2d(24),
Fire(inplanes=24,squeeze_planes=8,expand1x1_planes=14,expand3x3_planes=14),#(48,4,4)
nn.BatchNorm2d(28),
nn.MaxPool2d(kernel_size=2),#(32,2,2)
Fire(inplanes=28,squeeze_planes=6,expand1x1_planes=4,expand3x3_planes=4),#(16,2,2)
nn.BatchNorm2d(8)
)
self.fc = nn.Sequential(
nn.Linear(8*2*2, 32),
nn.Dropout(0.5),
nn.Linear(32, 10)
)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)#相当于Flatten
x = self.fc(x)
return x
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
self.conv1 = nn.Sequential(#(1, 8, 8)
nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1),#(8, 8, 8)
nn.BatchNorm2d(8),
nn.ReLU(),
nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),#(16, 8, 8)
nn.BatchNorm2d(16),
nn.ReLU(),#(16,8,8)
nn.MaxPool2d(kernel_size=2)#(16,4,4)
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),#(32, 4, 4)
nn.BatchNorm2d(32),
nn.ReLU(),#(32,4,4)
nn.MaxPool2d(kernel_size=2)#(32,2,2)
)
self.fc = nn.Sequential(
nn.Linear(32*2*2, 64),
nn.Dropout(0.5),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)#相当于Flatten
x = self.fc(x)
return x
def eval_on_dataloader(my_net,name,loader,len):
acc = 0.0
with torch.no_grad():
for data in loader:
images, labels = data
outputs = my_net(images)
predict_y = torch.max(outputs, dim=1)[1]#torch.max返回两个数值,一个是最大值,一个是最大值的下标
acc += (predict_y == labels).sum().item()
accurate = acc / len
return accurate
def plot_train_and_test_result(train_accs,test_accs):
epoches=np.arange(1,len(train_accs)+1,dtype=np.int32)
plt.plot(epoches,train_accs,label="train_accuracy")
plt.plot(epoches,test_accs,label="test_accuracy")
plt.xlabel('epoches')
plt.ylabel('accuracy')
plt.legend()
def soften(array):
return nn.Softmax()(array/T)
def loss_func(outputs,soft_labels):#mean_square_error
outputs=nn.Softmax()(outputs)
soft_labels=soften(soft_labels)
#print("outputs:",outputs)
#print("soft_labels:",soft_labels)
loss=-(soft_labels * torch.log(outputs)).sum()
#print(loss)
return loss/15.
#compact_net=torch.load("compact_net0.9888888888888889.pkl")
compact_net=CompactNet()
print(compact_net.parameters)
summary(compact_net, (1, 8, 8))
net=torch.load("net0.9955555555555555.pkl")
for param in net.parameters():
param.require_grad = False
print(net.parameters)
summary(net, (1, 8, 8))
#绘制正态分布直方图
#mu,sigma=0,100
#np.random.seed(233)
#x=np.random.normal(mu,sigma,size=100000)
#plt.hist(x,100,histtype="stepfilled",facecolor="r",alpha=1)
#plt.show()
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(net.parameters(), lr = LEARNING_RATE,weight_decay=3e-4)
for name,parameters in net.named_parameters():
print(name,":",parameters.size())
best_acc = 0.0
train_accs,test_accs=[],[]
tmplist=[]
for epoch in range(EPOCHES):
compact_net.train()#切换到训练模式
for step, data in enumerate(train_loader, start=0):
images, labels = data
optim.zero_grad()#将优化器的梯度清零
logits = compact_net(images)#网络推断的输出
print(logits)
loss=loss_fn(logits,labels.long())
#soft_labels=net(images)
##print(logits)
##print(soft_labels)
#loss = loss_func(logits, soft_labels)#计算损失函数
##print(loss)
#tmplist.append(float(loss))
##if len(tmplist)==150:
## plt.hist(tmplist,100,histtype="stepfilled",facecolor="r",alpha=1)
## plt.show()
## tmplist=[]
loss.backward()#反向传播求梯度
optim.step()#优化器进一步优化
rate = (step+1)/len(train_loader)
a = "*" * int(rate * 50)
b = "." * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="")
print()
compact_net.eval()#切换到测试模式
train_acc=eval_on_dataloader(compact_net,"train",train_loader,train_data.__len__())
test_acc=eval_on_dataloader(compact_net,"test",test_loader,test_data.__len__())
train_accs.append(train_acc)
test_accs.append(test_acc)
print("epoch:",epoch,"train_acc:",train_acc," test_acc:",test_acc)
if test_acc>=best_acc:
best_acc=test_acc
torch.save(compact_net, 'compact_net'+str(best_acc)+'.pkl')
print('Finished Training')
plot_train_and_test_result(train_accs,test_accs)
torch.save(net, 'compact_net_final.pkl')
plt.show()