之前的文章介绍过VGG16的网络结构,今天通过pytorch框架实现一下串联的VGGNET网络结构。
本次构建的VGG网络主要分成4个block,每一层卷积后面接上了BN层和relu层,增强模型的非线性效果,具体信息看代码,这里我添加了界面化的展示,把注释后的代码取消注释就可以得出第二张图片的每一层的信息。
import torch
# 进行卷积
import torch.nn as nn
# 进行softmax
import torch.nn.functional as F
from torchsummary import summary
class VGGbase(nn.Module):
# 初始化函数
def __init__(self):
# 初始化类
super(VGGbase, self).__init__()
# 定义算子
# input:32*32*3
# 3 * 28 * 28 (经过数据增强的crop-->32, 28,也可以为32)
# 定义序列:将卷积/池化/激活层都定义在同一个块里
self.conv1 = nn.Sequential(
# 输入channel:3 输出channel:64 Filte:3*3 step:1 padding:1
# 28*28*3 ---> (28-3+2*1)/1 + 1 = 28 ---> 28*28*64
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
# 28*28*64 ---> (28-2)/2 + 1 = 14 ---> 14*14*64
self.max_pooling1 = nn.MaxPool2d(kernel_size=2, stride=2)
# 14*14*64 ---> (14-3+2*1)/1 + 1 = 14 ---> 14*14*128
self.conv2_1 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
# 14*14*128 ---> (14-3+2*1)/1 + 1 = 14 ---> 14*14*128
self.conv2_2 = nn.Sequential(
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
# 14*14*128 ---> (14-2)/1 + 1 = 7 ---> 7*7*128
self.max_pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
# 7*7*128 ---> (7-3+2*1)/1 + 1 = 7 ---> 7*7*256
self.conv3_1 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# 7*7*256 ---> (7-3+2*1)/1 + 1 = 7 ---> 7*7*256
self.conv3_2 = nn.Sequential(
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# 7*7*256 ---> (7-2+2*1)/2 + 1 = 4 ---> 4*4*256
self.max_pooling3 = nn.MaxPool2d(kernel_size=2,
stride=2,
padding=1)
# 4*4*256 ---> (4-3+2*1)/1 + 1 = 4 ---> 4*4*512
self.conv4_1 = nn.Sequential(
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU()
)
# 4*4*256 ---> (4-3+2*1)/1 + 1 = 4 ---> 4*4*512
self.conv4_2 = nn.Sequential(
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU()
)
# 4*4*256 ---> (4-2)/2 + 1 = 2 ---> 2*2*512
self.max_pooling4 = nn.MaxPool2d(kernel_size=2,
stride=2)
# input_FC: batchsize(-1) * 512 * 2 *2 --> batchsize * (512 * 4)
# 由于是10分类,所以FC输出为10
self.fc = nn.Linear(512 * 4, 10)
# 对输入张量的网络处理,串联网络
def forward(self, x):
# x --> batchsize*3*28*28
batchsize = x.size(0)
out = self.conv1(x)
out = self.max_pooling1(out)
out = self.conv2_1(out)
out = self.conv2_2(out)
out = self.max_pooling2(out)
out = self.conv3_1(out)
out = self.conv3_2(out)
out = self.max_pooling3(out)
out = self.conv4_1(out)
out = self.conv4_2(out)
out = self.max_pooling4(out)
# 将张量拉平 -1根据输出自动填写,这里为512*4
# batchsize * c * h * w --> batchsize * n
out = out.view(batchsize, -1)
out = self.fc(out) # batchsize * 11
out = F.log_softmax(out, dim=1)
return out
def VGGNet():
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# vgg = VGGbase()
# net = vgg.to(device)
# summary(net, (3, 28, 28))
return VGGbase()
# if __name__ == '__main__':
#VGGNet()
这里比较不容易理解的是FC层操作,在张量进入FC层时都会进行一个拉平的操作,可以看到下图,每一层的最前面都有-1,这里的-1其实是batch_size,当你定义之后就会自动填上。
最后一层的pooling之后,输出是512*2*2,通过view将输出变成batch_size*(512*2*2),变成了一个一维的张量,这样就可以传入FC层,与输出10类进行一个映射。
所以通过pytorch进行串联网络的构建也不是这么的复杂。