目录
1、搭建模型的流程
1)步骤
2)完整代码——手写minist数据集为例(这里使用的数据集是自带的)
2、搭建模型的四种方法
1)方法一——利用nn.Sequential()
2)方法二——利用collections.orderDict()
3)方法三—— 先创建容器类,然后使用add_module函数向里面添加新模块
4)方法四——利用nn.function中的函数
3、VGG16搭建
4、全卷积层实现方法
5、保存各个子模块的输出特征层
1)先清楚需要搭建模型的构造,确定每层的通道数、步长、padding、卷积核大小等
2)创建模型类,将类继承于nn.modules();
class VGG16(nn.Module): def __init__(self,num_classes = 1000): super(VGG16,self).__init__() # 继承父类属性和方法
3)根据模型前向传播的顺序,组建好每一个子模块;一般是用容器nn.Sequential()来存放子模块;一般是放在模型类的__init__()函数当中;
self.conv1 = nn.Sequential( nn.Conv2d(3,64,(3,3),(1,1),(1,1)), nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算 )
4)添加forward方法()——上面__init__()方法中只是创建了每一个子模块,每个模块都是独立的,因此在forward函数中主要是根据前向传播的顺序,将各个子模块串起来。
# 前向传播函数 def forward(self,x): # 十三个卷积层 x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = self.conv6(x) x = self.conv7(x) x = self.conv8(x) x = self.conv9(x) x = self.conv10(x) x = self.conv11(x) x = self.conv12(x) x = self.conv13(x) x = self.conv14(x) # 将图像扁平化为一维向量,[1,512,7,7]-->1*512*7*7 output = x.view(x.size(0),-1) return output
5)到第4)步模型已经搭建好了,接下来实例化模型后,需要确定损失函数、梯度下降优化算法、学习率更新策略等
# 指定优化器,即梯度下降使用的梯度下降算法,一般有sgd和adam用的多 optimizer = optim.Adam(net.parameters(), lr=0.01) # 指定学习率优化器,即学习率的变化函数 ''' milestones:分段点 0-75:lr 75-150:gamma*lr 150-:gamma*gamma*lr ''' scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5) # 指定损失函数,这里选择的交叉熵损失函数来计算模型的loss criterion = nn.CrossEntropyLoss() # print(net)
6)如果数据样本很大,则需要分批次进行训练,即batch
7)训练
8)测试
9)保存模型
from collections import OrderedDict # OrderedDict是字典的子类,可以记住元素的添加顺序
from torch import optim
from torch import nn
# 构建模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
'''
一个卷积模块一般有卷积层、激活层、池化层组成
一个模型一般由多个卷积模块和多个全连接层组成
'''
# input shape 28,28,3
self.conv1 = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1, 16, (5, 5), (1, 1), (2, 2))), # 28,28,16
('ReLU1', nn.ReLU()),
('pool1', nn.MaxPool2d(2)), # 14,14,16
]))
self.conv2 = nn.Sequential(OrderedDict([
('conv2', nn.Conv2d(16, 32, (5, 5), (1, 1), (2, 2))), # 14,14,32
('ReLU2', nn.ReLU()),
('pool2', nn.MaxPool2d(2)) # 7,7,32
]))
self.linear = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 将图像扁平化为向量,即shape=【7,7,32】转化为shape=【1,1,7*7*32】
output = self.linear(x)
return output
# 模型实例化后,选择优化器和学习
net = Net()
# 指定优化器,即梯度下降使用的梯度下降算法,一般有sgd和adam用的多
optimizer = optim.Adam(net.parameters(), lr=0.01)
# 指定学习率优化器,即学习率的变化函数
'''
milestones:分段点
0-75:lr
75-150:gamma*lr
150-:gamma*gamma*lr
'''
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)
# 指定损失函数,这里选择的交叉熵损失函数来计算模型的loss
criterion = nn.CrossEntropyLoss()
# print(net)
# 将数据分成指定大小的批次,然后将数据分批输入后进行训练
def iterate_minibatches(inputs, targets, batch_size, shuffle=True):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
start_idx = None
# 按照步长为batchsize来计算得到每一个batch的起始样本索引号
for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
if shuffle:
excerpt = indices[start_idx:start_idx + batch_size]
else:
excerpt = slice(start_idx, start_idx + batch_size) # 切片
# 返回的是一个生成器对象,通过for循环可以取出,并且含有next函数
yield inputs[excerpt], targets[excerpt] # 根据索引分割出了一个batch,返回对应的特征集和标签集,一个生成器
if start_idx is not None and start_idx + batch_size < len(inputs):
excerpt = indices[start_idx + batch_size:] if shuffle else slice(start_idx + batch_size, len(inputs))
yield inputs[excerpt], targets[excerpt]
# 手写训练过程
# 导入数据
import numpy as np
import torch
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=False)
train_data_images = []
train_data_labels = []
test_data_images = []
test_data_labels = []
X_train, y_train = mnist.train.images, mnist.train.labels # 返回的 X_train 是 numpy 下的 多维数组,(55000, 784), (55000,)
X_test, y_test = mnist.test.images, mnist.test.labels # (10000, 784), (10000,)
X_valid, y_valid = mnist.validation.images, mnist.validation.labels # (5000, 784),(5000, )
train_data_images = np.concatenate((X_train, X_valid), axis=0) # (60000, 784)
train_data_labels = np.concatenate((y_train, y_valid), axis=0) # (60000,)
test_data_images = X_test # (10000, 784)
test_data_labels = y_test # (10000,)
# 变形
train_data_images = np.reshape(train_data_images, [-1, 1, 28, 28]) # (60000, 1, 28, 28)
test_data_images = np.reshape(test_data_images, [-1, 1, 28, 28]) # (10000, 1, 28, 28)
# 训练过程
train_loss = 0
train_correct = 0
total = 0
for epoch in range(epochs=100):
# iterate_minibatches()对train_data_images, train_data_labels批量划分,shuffle=True,表示乱序
# iterate_minibatches()返回的是一个迭代器对象
for data, target in iterate_minibatches(train_data_images, train_data_labels, batch_size=8, shuffle=True):
optimizer.zero_grad() # 清除梯度
output = net(data)
loss = criterion(output, target) # 计算误差
loss.backward() # 后向传播
optimizer.step()
train_loss += loss.item() # 计算1个epoch的loss和
# 将预测得分最大的类别作为最终的预测类别,元组
pred = torch.max(output, 1) # max函数会返回两个tensor,第一个tensor是每行的最大值;第二个tensor是每行最大值的索引。
train_correct += np.sum(pred[1] == target) # 计算1个epoch的accuracy,这里是通过判断最大预测得分的类别索引和真实标签的索引是否一致,是则表示预测成功
total += target.size(0)
Train_Accuracy = train_correct / total
Train_Loss = train_loss
# 测试过程
test_loss = 0
test_correct = 0
total = 0
for data, target in iterate_minibatches(test_data_images, test_data_labels, batch_size=8, shuffle=True):
output = net(data)
loss = criterion(output, target)
test_loss += loss.item()
pred = torch.max(output, 1)
test_correct += np.sum(pred[1] == target)
total += target.size(0)
Test_Accuracy = test_correct / total
Test_Loss = test_loss
# 保存模型
model_out_path = "model.pth"
torch.save(net, model_out_path)
print("Checkpoint saved to {}".format(model_out_path))
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential( # input shape (1, 28, 28)
nn.Conv2d(1, 16, 5, 1, 2), # output shape (16, 28, 28)
nn.ReLU(),
nn.MaxPool2d(2), # output shape (16, 14, 14)
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14)
nn.ReLU(),
nn.MaxPool2d(2), # output shape (32, 7, 7)
)
self.linear = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
output = self.linear(x)
return output
import torch.nn as nn
from collections import OrderedDict # OrderedDict是字典的子类,可以记住元素的添加顺序
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1, 16, 5, 1, 2)),
('ReLU1', nn.ReLU()),
('pool1', nn.MaxPool2d(2)),
]))
self.conv2 = nn.Sequential(OrderedDict([
('conv2', nn.Conv2d(16, 32, 5, 1, 2)),
('ReLU2', nn.ReLU()),
('pool2', nn.MaxPool2d(2)),
]))
self.linear = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
output = self.linear(x)
return output
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Sequential()
self.conv1.add_module('conv1', nn.Conv2d(1, 16, 5, 1, 2))
self.conv1.add_module('ReLU1', nn.ReLU())
self.conv1.add_module('pool1', nn.MaxPool2d(2))
self.conv2 = nn.Sequential()
self.conv2.add_module('conv2', nn.Conv2d(16, 32, 5, 1, 2))
self.conv2.add_module('ReLU2', nn.ReLU())
self.conv2.add_module('pool2', nn.MaxPool2d(2))
self.linear = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
output = self.linear(x)
return output
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
self.linear = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
output = self.linear(x)
return output
from torch import nn
class VGG16(nn.Module):
def __init__(self,num_classes = 1000):
super(VGG16,self).__init__() # 继承父类属性和方法
# 根据前向传播的顺序,搭建各个子网络模块
## 十三个卷积层,每个卷积模块都有卷积层、激活层和池化层,用nn.Sequential()这个容器将各个模块存放起来
# [1,3,224,224]
self.conv1 = nn.Sequential(
nn.Conv2d(3,64,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,64,224,224]
self.conv2 = nn.Sequential(
nn.Conv2d(64,64,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,64,112,112]
self.conv3 = nn.Sequential(
nn.Conv2d(64,128,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,128,112,112]
self.conv4 = nn.Sequential(
nn.Conv2d(128,128,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,128,56,56]
self.conv5 = nn.Sequential(
nn.Conv2d(128,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,256,56,56]
self.conv6 = nn.Sequential(
nn.Conv2d(256,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,256,56,56]
self.conv7 = nn.Sequential(
nn.Conv2d(256,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,256,28,28]
self.conv8 = nn.Sequential(
nn.Conv2d(256,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True)
)
# [1,512,28,28]
self.conv9 = nn.Sequential(
nn.Conv2d(512,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True)
)
# [1,512,28,28]
self.conv10 = nn.Sequential(
nn.Conv2d(512,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True),
nn.MaxPool2d((2,2),(2,2))
)
# [1,512,14,14]
self.conv11 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
)
# [1,512,14,14]
self.conv12 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
)
# [1,512,14,14]-->[1,512,7,7]
self.conv13 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d((2, 2), (2, 2))
)
# 三个全连接层,每个全连接层之间存在激活层和dropout层
self.classfier = nn.Sequential(
# [1*512*7*7]
nn.Linear(1*512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
# 4096
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
# 4096-->1000
nn.Linear(4096,num_classes)
)
# 前向传播函数
def forward(self,x):
# 十三个卷积层
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
# 将图像扁平化为一维向量,[1,512,7,7]-->1*512*7*7
x = x.view(x.size(0),-1)
# 三个全连接层
output = self.classfier(x)
return output
## 测试
import torch
vgg16 = VGG16(21)
print(vgg16)
input_ = torch.randn(1,3,224,224)
output = vgg16(input_)
print(output.shape)
print(output)
核心思想:其实就是将全连接层用卷积层去替换了,一般需要经过精心的设计,使得最后输出的是【1,1,channels】的shape。这里以vgg16为例,vgg16最后的特征图大小为【1,512,7,7】,若要变为1,1大小的特征图,则可以使用7,7的卷积核进行卷积,然后利用num_classes个卷积核去进行卷积,最后就得到了特征图【1,num_classes,1,1】,在输出前使用激活函数得到分类得分。
# 全卷积层
self.conv14 = nn.Sequential(
nn.Conv2d(512,num_classes,(7,7),(1,1)),
nn.ReLU(inplace=True)
)
from torch import nn
class VGG16(nn.Module):
def __init__(self,num_classes = 1000):
super(VGG16,self).__init__() # 继承父类属性和方法
# 根据前向传播的顺序,搭建各个子网络模块
## 十三个卷积层,每个卷积模块都有卷积层、激活层和池化层,用nn.Sequential()这个容器将各个模块存放起来
# [1,3,224,224]
self.conv1 = nn.Sequential(
nn.Conv2d(3,64,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,64,224,224]
self.conv2 = nn.Sequential(
nn.Conv2d(64,64,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,64,112,112]
self.conv3 = nn.Sequential(
nn.Conv2d(64,128,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,128,112,112]
self.conv4 = nn.Sequential(
nn.Conv2d(128,128,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,128,56,56]
self.conv5 = nn.Sequential(
nn.Conv2d(128,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,256,56,56]
self.conv6 = nn.Sequential(
nn.Conv2d(256,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
)
# [1,256,56,56]
self.conv7 = nn.Sequential(
nn.Conv2d(256,256,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True), # inplace = True表示是否进行覆盖计算
nn.MaxPool2d((2,2),(2,2))
)
# [1,256,28,28]
self.conv8 = nn.Sequential(
nn.Conv2d(256,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True)
)
# [1,512,28,28]
self.conv9 = nn.Sequential(
nn.Conv2d(512,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True)
)
# [1,512,28,28]
self.conv10 = nn.Sequential(
nn.Conv2d(512,512,(3,3),(1,1),(1,1)),
nn.ReLU(inplace=True),
nn.MaxPool2d((2,2),(2,2))
)
# [1,512,14,14]
self.conv11 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
)
# [1,512,14,14]
self.conv12 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
)
# [1,512,14,14]-->[1,512,7,7]
self.conv13 = nn.Sequential(
nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
nn.ReLU(inplace=True),
nn.MaxPool2d((2, 2), (2, 2))
)
# 全卷积层
self.conv14 = nn.Sequential(
nn.Conv2d(512,num_classes,(7,7),(1,1)),
nn.ReLU(inplace=True)
)
# 前向传播函数
def forward(self,x):
# 十三个卷积层
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.conv14(x)
# 将图像扁平化为一维向量,[1,512,7,7]-->1*512*7*7
output = x.view(x.size(0),-1)
return output
## 测试
import torch
vgg16 = VGG16(21)
print(vgg16)
input_ = torch.randn(1,3,224,224)
output = vgg16(input_)
print(output.shape)
print(output)
在forward函数中,将需要保存的特征层的输出保存在列表中即可,这里以ssd中的为例,其中feathers就是将需要的几个特征图保存了起来,便于后续进行特征图训练,实现多尺度的训练。
def forward(self, x):
features = []
for i in range(23):
x = self.vgg[i](x)
s = self.l2_norm(x) # Conv4_3 L2 normalization
features.append(s)
# apply vgg up to fc7
for i in range(23, len(self.vgg)):
x = self.vgg[i](x)
features.append(x)
for k, v in enumerate(self.extras):
x = F.relu(v(x), inplace=True)
if k % 2 == 1:
features.append(x)
return tuple(features)