2014年,牛津大学计算机视觉组(Visual Geometry Group)和Google DeepMind公司的研究员一起研发出了新的深度卷积神经网络:VGGNet,并取得了ILSVRC2014比赛分类项目的第二名(第一名是GoogLeNet,也是同年提出的).论文下载 Very Deep Convolutional Networks for Large-Scale Image Recognition。论文主要针对卷积神经网络的深度对大规模图像集识别精度的影响,主要贡献是使用很小的卷积核(3×3)构建各种深度的卷积神经网络结构,并对这些网络结构进行了评估,最终证明16-19层的网络深度,能够取得较好的识别精度。 这也就是常用来提取图像特征的VGG-16和VGG-19。
VGG可以看成是加深版的AlexNet,整个网络由卷积层和全连接层叠加而成,和AlexNet不同的是,VGG中使用的都是小尺寸的卷积核(3×3)。
VGGNet(
(vgg1): VGGBlock(
(conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu2): ReLU(inplace=True)
(conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU(inplace=True)
(maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(vgg2): VGGBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu2): ReLU(inplace=True)
(conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU(inplace=True)
(maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(vgg3): VGGBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu2): ReLU(inplace=True)
(conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU(inplace=True)
(maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(classifier): Sequential(
(0): Linear(in_features=12544, out_features=1024, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=1024, out_features=512, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=512, out_features=10, bias=True)
)
)
class VGGBlock(nn.Module):
def __init__(self, in_channel, out_channel, num_conv):
super(VGGBlock, self).__init__()
self.num_conv = num_conv
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1)
self.relu3 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.relu2(self.conv2(x))
if self.num_conv==3:
x = self.relu3(self.conv3(x))
else:
x = self.maxpool1(x)
return x
class VGGNet(nn.Module):
def __init__(self, num_classes):
super(VGGNet, self).__init__()
self.vgg1 = VGGBlock(1,64,2)
self.vgg2 = VGGBlock(64,128,2)
self.vgg3 = VGGBlock(128,256,3)
self.classifier = nn.Sequential(
nn.Linear(256 * 7 * 7, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 512),
nn.ReLU(inplace=True),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.vgg1(x)
x = self.vgg2(x)
x = self.vgg3(x)
x = x.reshape(x.shape[0], -1)
x = self.classifier(x)
return x