目录
前言
VGG网络详解及感受野计算
使用pytorch搭建VGG网络
model.py
代码部分
测试部分
train.py
代码部分
predict.py
这个是按照B站up主的教程学习这方面知识的时候自己做的的笔记和总结,可能有点乱,主要是按照我自己的记录习惯
参考内容来自:
2.表格是原论文中的
3.扩展:感受野
4.VGG详解,表中没有给出卷积参数和最大下采样参数
该项目的目录如下
|-imageprocessing
|-VGGNet
|-class_indices.json
|-model.py
|-predict.py
|-train.py
|-
只写了VGG网络结构的表中ABDE四个部分
import torch.nn as nn
import torch
# official pretrain weights
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
# 1.定义分类网络结构
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=False): # num_classes分类类别个数,init_weights是否对网络进行初始化
super(VGG, self).__init__()
self.features = features
# 定义分类网络结构classifer
self.classifier = nn.Sequential( # 通过 nn.Sequential生成分类网络结构
nn.Dropout(p=0.5),
nn.Linear(512*7*7, 2048),
nn.ReLU(True),
nn.Dropout(p=0.5), # 加入dropout减少过拟合
nn.Linear(2048, 2048),
nn.ReLU(True),
nn.Linear(2048, num_classes) # 输出层
)
if init_weights: # 是否参数初始化,如果传入参数变量为true,就会进去初始化函数中
self._initialize_weights()
def forward(self, x):
# N x 3 x 224 x 224
x = self.features(x) # 图像数据通过提前定义好的提取特征网络结构
# N x 512 x 7 x 7
x = torch.flatten(x, start_dim=1) # 输出进行展平处理 # start_dim=1 因为第0个维度是batch维度,所以要从第一个维度开始展平
# N x 512*7*7
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d): # 如果当前层是卷积层
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight) # 使用xavier的方法去初始化卷积核权重
if m.bias is not None:
nn.init.constant_(m.bias, 0) # 卷积核采用了偏置,偏置默认初始化为0
elif isinstance(m, nn.Linear): # 当前层全连接层
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
# 2.定义特征提取网络结构
def make_features(cfg: list): # 为了生成提取特征网络结构,定义了该函数,传入list类型的配置变量
layers = [] # 定义一个layer列表存放创建的每一层
in_channels = 3 # 因为输入图片为RGB图片,所以输入通道为3
for v in cfg:
if v == "M": # 当前配置元素是M,说明当前层是最大池化层下采样
layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 池化核2,步长2
else: # 数字部分就是卷积核
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) # 创建一个卷积操作,stride默认1不用写
layers += [conv2d, nn.ReLU(True)] # 刚定义好的卷积层和relu激活函数拼接
in_channels = v
return nn.Sequential(*layers) # 将列表通过非关键字的参数传入进去
# sequential进入看定义可以知道,传入非关键字参数可以生成一个新的网络结构
# cfgs字典文件,每个key代表一个模型的配置文件
# vgg11:表格中A配置,11层的网络
# vgg13:表格中B配置,13层的网络
# vgg16:D配置,16层的网络
# vgg13:E模型,19层的网络
# 都是VGG特征提取网络结构部分,对应表格中每一列是可以看清楚的
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 数字代表卷积层个数,数字代表池化层结构
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
# 实例化vgg网络,通过该函数可以实例化给定的模型
def vgg(model_name="vgg16", **kwargs):
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
cfg = cfgs[model_name] # 模型名字传入
model = VGG(make_features(cfg), **kwargs) # 通过vgg类实例化网络
return model
# 实例化类看一下,这段是测试的
vgg_model = vgg(model_name = 'vgg13')
打一下断点
实例化类
这个模型比较大,up主说需训练几个小时,所以没有带训练,时间比较长,效果一半
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0) # windows的线程个数为0,现在好像变了
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=0) # windows的线程个数为0,现在好像变了
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
# 实例化vgg网络的方法
model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True) # 导入vgg()函数,调用这个函数,输入model_name来指定使用哪一个vgg配置
# num_classes=5, init_weights=True输入会保存在model.py最后面vgg定义部分的**kwargs里面,调用vgg模型分类的时候就能得到具体参数
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs = 30
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = vgg(model_name="vgg16", num_classes=5).to(device)
# load model weights
weights_path = "./vgg16Net.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()