一.VGG 网络参数如下:
VGG网络及使用的图像输入是3x224x224的图像。
二.VGG 网络搭建如下(学习于B 站UP主:霹雳吧啦Wz,良心推荐):
1.阅读代码之前了解下conv2d的计算,其实nn.Linear,nn.MaxPool2d的输出的计算都是使用以下公式:
VGG16的输入是3x224x224,进入全连接层的输入是512x7x7,各个层次的输入输出,建议手算一遍。
import torch
import torch.nn as nn
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}
def make_features(cfg: list):
layer = []
in_chanels = 3
for v in cfg:
if v == 'M':
layer += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layer += [nn.Conv2d(in_chanels, v, 3, 1, 1), nn.ReLU(True)]
in_chanels = v
return nn.Sequential(*layer) # 列表、元组前面加星号作用是将列表解开成独立的参数,传入函数
class Vggnet(nn.Module):
def __init__(self, feature, num_classes=1000):
super(Vggnet, self).__init__()
self.feature = feature
self.classfier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(512*7*7, 2048), # 原4096
nn.ReLU(True),
nn.Dropout(0.5),
nn.Linear(2048, 2048), # 原4096
nn.ReLU(True),
nn.Linear(2048, num_classes)
)
def forward(self, x):
x = self.feature(x)
x = torch.flatten(x, start_dim=1)
x = self.classfier(x)
return x
def vgg(model_name='vgg16', **kwargs):
cfg = []
try:
cfg = cfgs[model_name]
except ImportError:
print("Warning: model number {} not in cfgs dict!".format(model_name))
exit(-1)
model = Vggnet(make_features(cfg), num_classes=5)
return model
net = vgg('vgg16')
二.训练脚本如下(学习于B 站UP主:霹雳吧啦Wz,良心推荐):
看下面代码之前先介绍几个知识点:
1.torch.max(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor)
建议参考博文:https://blog.csdn.net/qq_40210586/article/details/103874000
或pytorch官网文档:https://pytorch.org/docs/stable/generated/torch.max.html
import json
import os
import torch
import torch.utils.data as data
from torch import nn, optim
from torchvision import datasets, transforms
from tqdm import tqdm #关于进度条的,搜集篇博文大概了解下就行。
from model import net
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Running on the device of '{}'.".format(device))
transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
}
image_path = "/home/xulei/数据集大本营/5_flower_data/flower_data"
assert os.path.exists(image_path), "file '{}' does not exists.".format(image_path)
trainset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=transform["train"])
train_num = len(trainset)
flower_list = trainset.class_to_idx
cla_dict = dict((value, key) for key, value in flower_list.items())
json_str = json.dumps(cla_dict, indent=4)
with open('class_idices.json', 'w') as json_file:
json_file.write(json_str)
# 一些简单的网络可以使此值大一点,可以加快训练速度。显卡好的,也可以设置大一点。整数就行
batchsize = 16
# number of workers nw: 8,实测下面这句话的用处不大,可能是电脑配置较低的原因。
nw = min(os.cpu_count(), batchsize if batchsize > 1 else 0, 8)
print("using {} dataloader workers every process".format(nw))
trainloader = data.DataLoader(trainset, batchsize, shuffle=True, num_workers=nw)
valset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=transform["val"])
val_num = len(valset)
valloader = data.DataLoader(valset, batch_size=batchsize, shuffle=False, num_workers=nw)
print("using {} images for trainning, {} images for validation.".format(train_num, val_num))
model = net.to(device)
# model = net.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00004)
# learningrate = 0.0001
epoches = 20
save_path = './AlexNet.pth'
best_acc = 0.0
# optimizer = optim.SGD(model.parameters(), learningrate, momentum=0.9)
train_steps = len(trainloader)
for epoch in range(epoches):
total = 0 # 没用到
runningloss = 0.0
# runningcorrect = 0.0
model.train() # model.eval()是model对象继承nn.Module的的一个方法,表示模型处于训练模式
train_bar = tqdm(trainloader) # 进度条
for step, data in enumerate(train_bar):
img, label = data
outputs = model(img.to(device))
loss = criterion(outputs, label.to(device))
optimizer.zero_grad()
loss.backward()
optimizer.step()
runningloss += loss.item()
# runningcorrect += (predicted == label.to(device)).sum() 经过实验效果与下一条语句效果相同,但句式不如下一条,弃用
predicted = torch.max(outputs.data, dim=1)[1]
total += label.size(0)
# runningcorrect += torch.eq(predicted, label.to(device)).sum().item()
# 发现这两句话,没有用到,先注释掉,
train_bar.desc = "train epoch[{}/{}] loss:{:.3f} ".format(epoch + 1, epoches, loss)
model.eval() # model.eval()是model对象继承nn.Module的的一个方法,表示模型处于验证模式
acc = 0.0
with torch.no_grad():
val_bar = tqdm(valloader)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = model(val_images.to(device)) # 3月2日,将这里的net改为了medel
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
train_loss = runningloss / train_steps
print('\n[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, train_loss, val_accurate))
# 保存训练效果最好的一层的训练权重
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
torch.save(model, './save')
print("Finshed Training")
# model = torch.load('./path')
# checkpoint = model.state_dict()
三.测试脚本:
说明一下,我用的是Ubuntu系统,代码中与windows下运行区别可能在与文件路径的格式上。
import torch
import os
import json
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
from model import net
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 注意这个路径的是我专门存放用于测试的图片,都是网上下载的图片。
img_path = "/home/xulei/数据集大本营/5_flower_data/predict/sunflowers004.jpg"
assert os.path.exists(img_path), "file '{}' does not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
img = transform(img)
img = torch.unsqueeze(img, dim=0)
json_path = './class_idices.json'
assert os.path.exists(json_path), "file '{}' does not exist.".format(json_path)
json_file = open(json_path, 'r')
class_indict = json.load(json_file) # class_indict 是个字典
model = net.to(device)
# load model weights
# 从此处看出文件名字输入的是之前训模型的名字,留意一下就行,记得下次训练之前在训练模型中修改。
weights_path = './AlexNet.pth'
assert os.path.exists(img_path), "file '{}' does not exist.".format(img_path)
model.load_state_dict(torch.load(weights_path))
model.eval() # 预测模式或者验证时,不希望Droupt()函数生效。
with torch.no_grad():
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3f}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
plt.show()
if __name__ == "__main__":
main()
四.数据集:
有以上的代码部分,还是不能运行。因为好多网络使用的都是3x224x224的数据集,我这里目前使用的是花分类数据集,后面的学习都可以用这个数据集,还可以将训练效果进行横向对比。
——————————————————————————————————————
下载地址:
链接: https://pan.baidu.com/s/181cUGryBlzds__PI8rGheg
提取码: sf4h
——————————————————————————————————————
关于,数据集的配置,请参考,B站良心UP主,霹雳吧啦WZ:
以下是连接:https://www.bilibili.com/video/BV1p7411T7Pc
这个UP主太利害了,我都不舍得推荐给你们的那种,好看的话,记着回来给我点个赞。
五.我的运行结果:
batchsize=16,建议改特别大是一下,显存爆满之后会报错,然后慢慢调到一个自己显卡可以接受的大
下。
如果是ubuntu系统下,可以通过下面的命令查看GPU运行情况:watch -n 1 -d nvidia-smi
参照UP主将网络该小了4096->2048
vgg16 (51277, 2048), (2048, 2048), (2048, num_classes=5) epoches=20 optimizer = optim.Adam(model.parameters(), lr=0.00004) criterion = nn.CrossEntropyLoss()
————————————————————————————————————————
训练最好结果:
[epoch 18] train_loss: 0.929 val_accuracy: 0.648
————————————————————————————————————————
总共20轮,平均每轮耗时2分10秒,GPU使用率99%,我的GPU是GTX 1050Ti的,显存4G,性能一般,大型代码难跑。建议入门的GTX 1080Ti,显存6G。显存大,batchsize就可以设置更大,训练速度就会更快。
我的预测的准确率不高,可能是因为我后来调试将epoches调小的原因吧,建议大家可以一次训练的轮次大一点,然后将代码中的权重保存的部分注释调,再去调试,这样先前保存的权重就不会被覆盖了。预测效果:
谢谢您观看我的博文,如有不足之处,欢迎指正。