现在,我们已经准备好创建第一个派生类,全连接神经网络。传统上,全连通网络被称为多层感知器(MLP)。在大多数深度学习框架(包括Pytorch)中,它们被简单地称为线性层。
创建这样一个类需要下列需求:
'''
新类FC派生自我们的基类而不是nn.Module.因此能够在基类获得所有可用的方法
'''
class FC(Network):
'''
将模型参数传递到init.我们已经看了大多数参数。另外的参数有num_inputs, num_outputs, 默认为‘relu’的non_linearity和隐藏层维数。
'''
def __init__(self,num_inputs,
num_outputs,
layers=[],
lr=0.003,
class_names=None,
optimizer_name='Adam',
dropout_p=0.2,
non_linearity='relu',
criterion_name='NLLLoss',
model_type='classifier',
best_accuracy=0.,
best_accuracy_file ='best_accuracy.pth',
chkpoint_file ='chkpoint_file.pth',
device=None):
super().__init__(device=device)
self.set_model_params(criterion_name,
optimizer_name,
lr,
dropout_p,
'FC',
best_accuracy,
best_accuracy_file,
chkpoint_file
)
class FC(Network):
def __init__(self,num_inputs,
num_outputs,
layers=[],
lr=0.003,
class_names=None,
optimizer_name='Adam',
dropout_p=0.2,
non_linearity='relu',
criterion_name='NLLLoss',
model_type='classifier',
best_accuracy=0.,
best_accuracy_file ='best_accuracy.pth',
chkpoint_file ='chkpoint_file.pth',
device=None):
super().__init__(device=device)
self.set_model_params(criterion_name,
optimizer_name,
lr,
dropout_p,
'FC',
best_accuracy,
best_accuracy_file,
chkpoint_file
)
self.non_linearity = non_linearity
'''
我们将实际的网络作为一个连续的块存储在FC对象的模型属性中
'''
self.model = nn.Sequential()
'''
我们创建层组并将它们添加到顺序模型中。
每一组由一个线性层、一个非线性层和一个概率dropout作为参数传递。inplace= True))
'''
if len(layers) > 0:
self.model.add_module('fc1',nn.Linear(num_inputs,layers[0]))
self.model.add_module('relu1',nn.ReLU())
self.model.add_module('dropout1',nn.Dropout(p=dropout_p,inplace=True))
for i in range(1,len(layers)):
self.model.add_module('fc'+str(i+1),nn.Linear(layers[i-1],layers[i]))
self.model.add_module('relu'+str(i+1),nn.ReLU())
self.model.add_module('dropout'+str(i+1),nn.Dropout(p=dropout_p,inplace=True))
self.model.add_module('out',nn.Linear(layers[-1],num_outputs))
else:
'''
如果没有任何隐藏层,我们只需要在序列模型中添加一层,其中包含输入和输出的数量。在这种情况下,我们不添加任何non-Linearity或dropout,因
non-Linearity通常添加在隐藏层中。
'''
self.model.add_module('out',nn.Linear(num_inputs,num_outputs))
NLLLoss
Cross_entropy Loss
内置
class FC(Network):
def __init__(self,num_inputs,
num_outputs,
layers=[],
lr=0.003,
class_names=None,
optimizer_name='Adam',
dropout_p=0.2,
non_linearity='relu',
criterion_name='NLLLoss',
model_type='classifier',
best_accuracy=0.,
best_accuracy_file ='best_accuracy.pth',
chkpoint_file ='chkpoint_file.pth',
device=None):
super().__init__(device=device)
self.set_model_params(criterion_name,
optimizer_name,
lr,
dropout_p,
'FC',
best_accuracy,
best_accuracy_file,
chkpoint_file
)
self.non_linearity = non_linearity
self.model = nn.Sequential()
if len(layers) > 0:
self.model.add_module('fc1',nn.Linear(num_inputs,layers[0]))
self.model.add_module('relu1',nn.ReLU())
self.model.add_module('dropout1',nn.Dropout(p=dropout_p,inplace=True))
for i in range(1,len(layers)):
self.model.add_module('fc'+str(i+1),nn.Linear(layers[i-1],layers[i]))
self.model.add_module('relu'+str(i+1),nn.ReLU())
self.model.add_module('dropout'+str(i+1),nn.Dropout(p=dropout_p,
inplace=True))
self.model.add_module('out',nn.Linear(layers[-1],num_outputs))
else:
self.model.add_module('out',nn.Linear(num_inputs,num_outputs))
'''
如果loss = NLLLoss则使用Logsoftmax
'''
if model_type.lower() == 'classifier' and criterion_name.lower() == 'nllloss':
self.model.add_module('logsoftmax',nn.LogSoftmax(dim=1))
'''
我们将模型的属性保存在对象中,以备以后引用
'''
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.layer_dims = layers
'''
如果passed,我们存储类名字典,否则我们创建一个简单的字典与每个类id转换成一个字符串id,例如1应转换为'1'作为类名。
'''
if class_names is not None:
self.class_names = class_names
else:
self.class_names = {str(k):v for k,v in enumerate(list(range(num_outputs)))}
def flatten_tensor(x):
return x.view(x.shape[0],-1)
class FC(Network):
def __init__(self,num_inputs,
num_outputs,
layers=[],
lr=0.003,
class_names=None,
optimizer_name='Adam',
dropout_p=0.2,
non_linearity='relu',
criterion_name='NLLLoss',
model_type='classifier',
best_accuracy=0.,
best_accuracy_file ='best_accuracy.pth',
chkpoint_file ='chkpoint_file.pth',
device=None):
super().__init__(device=device)
self.set_model_params(criterion_name,
optimizer_name,
lr,
dropout_p,
'FC',
best_accuracy,
best_accuracy_file,
chkpoint_file
)
self.non_linearity = non_linearity
self.model = nn.Sequential()
if len(layers) > 0:
self.model.add_module('fc1',nn.Linear(num_inputs,layers[0]))
self.model.add_module('relu1',nn.ReLU())
self.model.add_module('dropout1',nn.Dropout(p=dropout_p,inplace=True))
for i in range(1,len(layers)):
self.model.add_module('fc'+str(i+1),nn.Linear(layers[i-1],layers[i]))
self.model.add_module('relu'+str(i+1),nn.ReLU())
self.model.add_module('dropout'+str(i+1),nn.Dropout(p=dropout_p,
inplace=True))
self.model.add_module('out',nn.Linear(layers[-1],num_outputs))
else:
self.model.add_module('out',nn.Linear(num_inputs,num_outputs))
if model_type.lower() == 'classifier' and criterion_name.lower() == 'nllloss':
self.model.add_module('logsoftmax',nn.LogSoftmax(dim=1))
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.layer_dims = layers
if class_names is not None:
self.class_names = class_names
else:
self.class_names = {str(k):v for k,v in enumerate(list(range(num_outputs)))}
'''
定义前向函数,在压平输入之后,它基本上调用我们模型的前向函数(本例为nn.Sequential)
'''
def forward(self,x):
return self.model(flatten_tensor(x))
class FC(Network):
...
'''
在Pytorch中Dropout层是'torch.nn.modules.dropout.Dropout'.这可以在顺序模型中对每一个这样的层进行检查和相应的操作。
'''
def _get_dropout(self):
for layer in self.model:
if type(layer) == torch.nn.modules.dropout.Dropout:
return layer.p
def _set_dropout(self,p=0.2):
for layer in self.model:
if type(layer) == torch.nn.modules.dropout.Dropout:
print('FC: setting dropout prob to {:.3f}'.format(p))
layer.p=p
为了正确地恢复我们的FC模型,我们需要保存另外四个属性。它们是numb_inputs,num_outputs, layers和class_names。因为这些都是特定于FC模型的,所以我们应该编写FC模型的get_model_param和set_model_param方法版本,这些方法在内部调用基类,并执行任何额外的东西。
class FC(Network):
...
def set_model_params(self,
criterion_name,
optimizer_name,
lr,
dropout_p,
model_name,
model_type,
best_accuracy,
best_accuracy_file,
chkpoint_file,
num_inputs,
num_outputs,
layers,
class_names):
'''
我们调用父类的set_model_params方法,将所有必需的参数传递给它,然后将其他参数作为属性添加到对象中
'''
super(FC, self).set_model_params(criterion_name,
optimizer_name,
lr,
dropout_p,
model_name,
best_accuracy,
best_accuracy_file,
chkpoint_file
)
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.layer_dims = layers
self.model_type = model_type
if class_names is not None:
self.class_names = class_names
else:
self.class_names = {k:str(v) for k,v in enumerate(list(range(num_outputs)))}
def get_model_params(self):
'''
我们调用父类的get_model_params方法并检索params的字典,然后将我们的模型特定属性添加到字典中
'''
params = super(FC, self).get_model_params()
params['num_inputs'] = self.num_inputs
params['num_outputs'] = self.num_outputs
params['layers'] = self.layer_dims
params['model_type'] = self.model_type
params['class_names'] = self.class_names
params['device'] = self.device
return params
def load_chkpoint(chkpoint_file):
restored_data = torch.load(chkpoint_file)
params = restored_data['params']
print('load_chkpoint: best accuracy = {:.3f}'.format(params['best_accuracy']))
if params['model_type'].lower() == 'classifier':
net = FC( num_inputs=params['num_inputs'],
num_outputs=params['num_outputs'],
layers=params['layers'],
device=params['device'],
criterion_name = params['criterion_name'],
optimizer_name = params['optimizer_name'],
model_name = params['model_name'],
lr = params['lr'],
dropout_p = params['dropout_p'],
best_accuracy = params['best_accuracy'],
best_accuracy_file = params['best_accuracy_file'],
chkpoint_file = params['chkpoint_file'],
class_names = params['class_names']
)
net.load_state_dict(torch.load(params['best_accuracy_file']))
net.to(params['device'])
return net
这就完成了我们的FC类。现在我们应该在进行下一步之前对其进行测试。让我们在MNIST数据集上测试它。
首先,我们应该计算MNIST数据集的平均值和std值
train_data = datasets.MNIST(root='data',download=True,
transform = transforms.transforms.ToTensor())
mean_,std_= calculate_img_stats(train_data)
mean_,std_
(tensor([0.0839, 0.2038, 0.1042]), tensor([0.2537, 0.3659, 0.2798]))
我们像之前一样使用计算出的平均值和std值创建转换,然后将它们应用到训练集和测试集,并将训练集拆分为训练和验证部分。请记住,如果将测试集作为参数给出,那么split_image_data函数只会将测试集转换为dataloader。
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.2998,))
])
test_transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.2998,))
])
mnist图像都是灰度图像,只有一个通道,transforms.Normalize只需要一个数值,因此取三个值平均。
train_dataset = datasets.MNIST(root='data',download=False,train=True, transform = train_transform)
test_dataset = datasets.MNIST(root='data',download=False,train=False,transform = test_transform)
trainloader,validloader,testloader = split_image_data(train_dataset,test_dataset,batch_size=50)
len(trainloader),len(validloader),len(testloader)
(960, 240, 200)
from mylib.fc import *
net = FC(num_inputs=784,
num_outputs=10,
layers=[512,512],
optimizer_name='Adadelta',
best_accuracy_file ='best_accuracy_mnist_fc_test.pth',
chkpoint_file ='chkpoint_file_mnist_fc_test.pth')
setting optim Ada Delta
优化器选择
在我的实验中,Adadelta通常在图像数据集上给出了最高的准确性,比Adam和SGD好得多,特别是在Cifar10上。
接下来,我们调用fit函数,来训练和验证dataloader,训练5个epoch,每个epoch输出300个批,同时每个epoch都执行validaton(记住validate_every = 1的默认值)。
net.fit(trainloader,validloader,epochs=5,print_every=300)
我们只在5个epochs就获得较高的精度,增加epoch还可以进一步提高精度。因此,让我们首先测试save和load chkpoint函数,然后继续进行另外10个epoch的训练。
net.save_chkpoint()
get_model_params: best accuracy = 95.467
get_model_params: chkpoint file = chkpoint_file_mnist_fc_test.pth
checkpoint created successfully in chkpoint_file_mnist_fc_test.pth
net2 = load_chkpoint('chkpoint_file_mnist_fc_test.pth')
load_chkpoint: best accuracy = 95.467
setting optim Ada Delta
载入保存的chkpoint加载到另一个变量中,以确保它是一个新模型。
net2.fit(trainloader,validloader,epochs=10,print_every=300)
updating best accuracy: previous best = 96.683 new best = 96.767
经过10个epoch后,我们在验证集上可以达到的最佳准确度为96.767。在测试评估方法之前,让我们再一次保存并恢复模型。
net2.save_chkpoint()
get_model_params: best accuracy = 96.767
get_model_params: chkpoint file = chkpoint_file_mnist_fc_test.pth
checkpoint created successfully in chkpoint_file_mnist_fc_test.pth
net3 = load_chkpoint('chkpoint_file_mnist_fc_test.pth')
load_chkpoint: best accuracy = 96.767
setting optim Ada Delta
net3.evaluate(testloader)
(96.94,
[(‘0’, 98.67346938775509),
(‘1’, 99.55947136563876),
(‘2’, 96.89922480620154),
(‘3’, 97.12871287128712),
(‘4’, 98.06517311608961),
(‘5’, 93.94618834080718),
(‘6’, 96.76409185803759),
(‘7’, 96.49805447470817),
(‘8’, 96.09856262833677),
(‘9’, 95.14370664023785)])
接下来我们还将测试我们的predict函数。要做到这一点,我们需要将testloader转换成Python迭代器,然后使用迭代器的“next”方法从它获取下一批数据。如果您不熟悉Python迭代器,请参阅任何好的教程,例如这里(https://www.datacamp.com/community/tutorials/pythoniterator-tutorial)以获得更多信息
iterator = iter(testloader)
imgs_,labels_ = next(iterator)
imgs_[0].shape,labels_[0].item()
(torch.Size([1, 28, 28]), 7)
上面我们可以看到我们的第一批的第一张图是1x28x28而其标签=7,我们可以转换为numpy,删除额外的维度并使用matplotlib pyplot来显示,让它只有28x28代替1x28x28
注意,要将一个Pytorch张量转换成numpy数组,只需使用Pytorch张量对象上可用的.numpy()方法
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure(figsize=(40,10))
ax = fig.add_subplot(2,10, 1, xticks=[], yticks=[])
ax.imshow(np.squeeze(imgs_[0].numpy()), cmap='gray')
net3.predict(imgs_[0])[1].item()
7