文章目录
前言
一、步骤解析
二、代码实现
三、验证
利用Pytorch对VGG16代码实现的过程进行解析,并进行验证。
输入宽度、高度、通道数分别为224、224、3的图片,输出宽度、高度、通道数分别为224、224、64的图片。该过程经历两次卷积,两次非线性激活。
nn.Conv2d(3, 64, kernel_size=3, padding=1), # 输入3通道, 输出64通道, 卷积核为3×3,填充为1
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1), # 输入、输出通道数不变, 卷积核为3×3,填充为1
nn.ReLU(inplace=True),
输入宽度、高度、通道数分别为224、224、64的图片,输出宽度、高度、通道数分别为112、112、128的图片。该过程经历一次最大池化,两次卷积和两次非线性激活。
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 缩小图像尺寸
nn.Conv2d(64, 128, kernel_size=3, padding=1), # 输出通道数变为原来2倍
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1), # 输入、输出通道数不变
nn.ReLU(inplace=True),
输入宽度、高度、通道数分别为112、112、128的图片,输出宽度、高度、通道数分别为56、56、256的图片。该过程经历一次最大池化,三次卷积和三次非线性激活。
nn.MaxPool2d(kernel_size=2, stride=2), # 缩小图像尺寸
nn.Conv2d(128, 256, kernel_size=3, padding=1), # 通道数变为原来的2倍
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
输入宽度、高度、通道数分别为56、56、256的图片,输出宽度、高度、通道数分别为28、28、512的图片。该过程经历一次最大池化,三次卷积和三次非线性激活。
nn.MaxPool2d(kernel_size=2, stride=2), # 缩小输入图像尺寸
nn.Conv2d(256, 512, kernel_size=3, padding=1), # 通道数变为原来的2倍
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
输入宽度、高度、通道数分别为28、28、512的图片,输出宽度、高度、通道数分别为14、14、512的图片。该过程经历一次最大池化,三次卷积和三次非线性激活。
nn.MaxPool2d(kernel_size=2, stride=2), # 缩小输入图像尺寸
nn.Conv2d(512, 512, kernel_size=3, padding=1), # 输入、输出通道数保持不变
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
输入宽度、高度、通道数分别为14、14、512的图片,输出宽度、高度、通道数分别为7、7、512的图片。该过程经历一次最大池化。
nn.MaxPool2d(kernel_size=2, stride=2), # 缩小图像尺寸
分类的过程
nn.Flatten(), # 展平
nn.Linear(25088, 4096), # 通过线性层,将25088个特征转化为4096个特征。
nn.ReLU(inplace=True),
nn.Dropout(p=0.5, inplace=True), # 防止出现过拟合的现象
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5, inplace=True),
nn.Linear(4096, 1000), # 将4096个特征转化为1000个特征
import torch
from torch import nn
from torch.nn import Flatten
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.AdaptiveAvgPool2d(output_size=(7, 7))
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(25088, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5, inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5, inplace=True),
nn.Linear(4096, 1000),
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
model = Net()
Net(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(31): AdaptiveAvgPool2d(output_size=(7, 7))
)
(classifier): Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=25088, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=True)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Dropout(p=0.5, inplace=True)
(7): Linear(in_features=4096, out_features=1000, bias=True)
)
)
输入input = torch.ones((64, 3, 224, 224)),让其经过model()模型,最后输出1000个特征值。
input = torch.ones((64, 3, 224, 224))
output = model(input)
print(output.shape)
torch.Size([64, 1000])
欢迎交流、沟通、批评、指正!