#VGG网络
如上图所示,VGG网络由5层卷积层,5层池化层,3层全连接层,softmax输出层组成。其中所有隐层之间采用RELU激活函数。最常用的配置是VGG16
VGG网络亮点:通过堆叠多个33的卷积核来替代大尺度卷积核,以此减少所需参数。
VGG网络论文中提到:可以通过堆叠两个33的卷积核替代55的卷积核,堆叠三个33的卷积核替代7*7的卷积核。这样可以拥有相同的感受野。
对于VGG16卷积神经网络而言,其13层卷积层和5层池化层负责进行特征提取,最后的3层全连接层负责完成分类任务。
如图所示,第3层11的feature map大小对应第二层感受野22的区域,第二层22的feature map对应第一层输入的55区域
感受野计算公式:
F(i)为第i层感受野
Stride为第i层的布局
Ksize为卷积核或池化核尺寸
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-YhGzMzBV-1668132550076)(images/VGG/5.png)]
如图:第三层Feature map 感受野F=1
Pool1: F(2)=(F(3)-1)*Stride+Ksize=2
Conv1: F(1)=(F(2)-1)*Stride+Ksize=5
特征提取:
1.输入图像2242243的RGB图像,经过64个通道为33的卷积核,步长为1,padding=1填充,卷积两次,再经RELU激活,输出的尺寸大小为(224-3+2)/1+1=224 (224,224,64)
2.经max pooling(最大池化层),滤波器为22,步长为2,图像尺寸减半,池化后的尺寸变为(112,112,64)
3.经128个通道的33的卷积核,两次卷积,RELU激活,输出尺寸:(112-3+2)/1+1=112 (112,112,128)
4.经max pooling(最大池化层),滤波器为22,步长为2,图像尺寸减半,池化后的尺寸变为(56,56,128)
5.经过256个通道的33的卷积核,三次卷积,RELU激活,输出尺寸(56-3+2)/1+1=56,输出:(56,56,256)
6.经过max pool(最大池化层),滤波器为22,步长为2,图像尺寸减半,池化后的尺寸变为(28,28,128)
7.经过512个通道的33的卷积核,三次卷积,RELU激活,输出尺寸(28-3+2)/1+1=28,输出:(28,28,512)
8.经过max pool(最大池化层),滤波器为22,步长为2,图像尺寸减半,池化后的尺寸变为(14,14,51)
9.经过512个通道的33的卷积核,三次卷积,RELU激活,输出尺寸(14-3+2)/1+1=14,输出:(14,14,512)
10.经过max pool(最大池化层),滤波器为22,步长为2,图像尺寸减半,池化后的尺寸变为(7,7,512)
11.然后经过flatten(),将数据拉平成向量,变成一维 51277=25088
分类:
1.经过两层114096,一层111000进行全连接+RELU(共三层)
2.经过sofemax输出1000个预测结果。
import torch.nn as nn
import torch
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
#vgg网络配置字典 元素为列表类型
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self,features,num_classes=1000,init_weight=False):
super(VGG,self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096,num_classes)
)
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self,x):
#N,3,224,224
x = self.features(x)
#N,512,7,7 沿通道展开
x = torch.flatten(x,start_dim=1)
#N,512*7*7
x = self.classifier(x)
return x
def make_features(cfg:list):
layers = []
in_channels = 3
for v in cfg:
if v == "M":
#滤波器为2*2,步长为2
layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
else:
conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
layers += [conv2d,nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)
def vgg(model_name="vgg16", **kwargs):
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
cfg = cfgs[model_name]
model = VGG(make_features(cfg), **kwargs)
return model
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
#图像预处理 裁剪 转化成张量,标准化等
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../")) # get data root path
#print(data_root)#D:\深度学习笔记\wzppt\deep-learning-for-image-processing-master 获取项目的根目录
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path 获取数据集目录
print(image_path) #D:\深度学习笔记\wzppt\deep-learning-for-image-processing-master\data_set\flower_data
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
#加载训练数据集data_set->flower_data->train
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx #将训练数据集的标签转为字典
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
#准备数据进行训练
train_dataloader = torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=nw)
#加载验证数据集
val_dataset = datasets.ImageFolder(root=os.path.join(image_path,"val"),transform=data_transform["val"])
val_num = len(val_dataset)
val_dataloader = torch.utils.data.DataLoader(val_dataset,batch_size=batch_size,shuffle=False,num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
epoches = 1
model_name = "vgg16"
model = vgg(model_name=model_name,num_classes=5,init_weight=True)
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.0001)
best_accurancy = 0.0
save_path = './{}Net.pth'.format(model_name)
train_step = len(train_dataloader)
for epoch in range(epoches):
#train
model.train()
running_loss = 0.0
train_bar = tqdm(train_dataloader,file=sys.stdout)
for step,data in enumerate(train_bar):
images,labels = data
optimizer.zero_grad()
outputs = model(images.to(device))
loss = loss_function(outputs,labels.to(device))
loss.backward()
optimizer.step()
#分析
running_loss +=loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch+1,epoches,loss)
#val
model.eval()
acc = 0.0
with torch.no_grad():
val_bar = tqdm(val_dataloader,file=sys.stdout)
for val_data in val_bar:
images,labels = val_data
outputs = model(images.to(device))
predict_y = torch.max(outputs,dim=1)[1]
acc+=(predict_y==labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train loss:%.3f val accurancy:%.3f'%(epoch+1,running_loss/train_step,val_accurate))
if val_accurate>best_accurancy:
best_accurancy = val_accurate
torch.save(model.state_dict(),save_path)
print('Finish Training')
if __name__ == '__main__':
main()
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = vgg(model_name="vgg16", num_classes=5).to(device)
# load model weights
weights_path = "./vgg16Net.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()