深度学习——(4)VGG16 图像分类

深度学习——(4)VGG16 图像分类

文章目录

  • 深度学习——(4)VGG16 图像分类
    • 1. model
    • 2. train
    • 3. predict

上次使用ResNet对图像进行分类,因为现在数据是同一类图像,想要对他进行更加细致的分类,还在学习其他的分类model。最近用VGG16,是站在巨人的肩膀上的一篇博客,是对以前模型的微调,但是真的有学习到,就算是自己的学习记录,后面会学习并尝试其他的model。

1. model

各个VGG在ImageNet上的训练参数下载

model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
# -*- coding: utf-8 -*-
import torch.nn as nn
import torch
import torchvision.models as models
# official pretrain weights
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=False):
        super(VGG, self).__init__()
        self.features = features
        if init_weights==True: # 如果要加载imageNet的权重,还原
            self.classifier = nn.Sequential(
                nn.Linear(512*7*7, 4096),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(4096,num_classes))
        else :
            self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 256),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(256, 64),
            nn.ReLU(True),             
            nn.Linear(64, 16),
            nn.ReLU(True),            
            nn.Linear(16, num_classes))

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_features(cfg: list):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(True)]
            in_channels = v
    return nn.Sequential(*layers)


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", **kwargs):
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), **kwargs)
    return model

2. train

# -*- coding: utf-8 -*-
import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import vgg
from pandas.core.frame import DataFrame

global log # 存储loss值
log=[] 
def main():
    #参数设置
    
    image_path = 'D:/Python/VGG16-CAM/data/' # 图片存储路径,data下面包含的是各个类别的文件夹
    model_name = "vgg16"
    
    pre_path='D:/Python/VAE/weights/vgg16-397923af.pth'# 如果没有以前训练好的权重,先使用ImageNet训练好的权重进行初始化
    save_path = 'D:/Python/VGG16-CAM/{}Net.pth'.format(model_name)#model权重保存的路径
    
    num_classes=5 # 具体分类
    batch_size = 64 
    epochs = 1000
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #使用CPU还是GPU
    print("using {} device.".format(device))
    
    # 将数据标准化,并转为tensor类型
    data_transform = {   
        "train": transforms.Compose([transforms.Resize([224,224]),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize([224, 224]),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])}

    # 训练加载数据
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path) #确保图片路径无误
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)
# 具体分类写入json
#    {"0": "grade0","1": "grade1","2": "grade2","3": "grade3","4": "grade4"}
    grade_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in grade_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    # 转为dataloader型
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)
    # 加载验证数据集
    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,val_num))

# 加载模型  
    if os.path.exists(save_path): #如果有已经训练好的权重,直接加载
        net = vgg(model_name=model_name, num_classes=num_classes).to(device)#此处初始化时init_weights=FALSE
        net.load_state_dict(torch.load(save_path, map_location=device))#加载权重
        for parameter in net.parameters(): #required_grad==False 才不会反向传播,只训练下面部分(微调)
            parameter.required_grad = False
        net.classifier = nn.Sequential( #定义自己的分类器
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 256),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(256, 64),
            nn.ReLU(True),             
            nn.Linear(64, 16),
            nn.ReLU(True),            
            nn.Linear(16, num_classes))
    else:
        net = vgg(model_name=model_name, num_classes=1000,init_weights=True) # 没有训练好的权重,需要加载ImageNet的权重
        net.load_state_dict(torch.load(pre_path, map_location=device))
        for parameter in net.parameters():
            parameter.required_grad = False
        net.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 256),
            nn.ReLU(True),
            nn.Dropout(p=0.1),
            nn.Linear(256, 64),
            nn.ReLU(True), 
            nn.Linear(64, 16),
            nn.ReLU(True), 
            nn.Linear(16, num_classes))
    
    net.to(device)
    class_weight=torch.tensor([0.06756,0.1426,0.106,0.225,0.458]) # 数据不平衡,在loss计算过程中使用class_weight,给每一类一个权重
    loss_function = nn.CrossEntropyLoss(class_weight)
    optimizer = optim.Adam(net.parameters(), lr=0.0001)  # 优化器

    
    best_acc = 0.5
    train_steps = len(train_loader)
    val_steps=len(validate_loader)
    for epoch in range(epochs):
        # 训练
        net.train()
        train_acc = 0.5 # 训练集上的准确率,每个epoch
        train_loss = 0.0 # 训练集上的loss值
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            train_y = torch.max(outputs, dim=1)[1]
            
            loss = loss_function(outputs, labels.to(device)) # 计算loss值用于反向传播
            train_acc += torch.eq(train_y, labels.to(device)).sum().item() 
            loss.backward()
            optimizer.step()
            # 动态显示loss的变化
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
            
            # 统计一个epoch上训练的loss值
            train_loss += loss.item()

        # 验证
        net.eval()
        val_loss=0.0
        val_acc = 0.5  # accumulate accurate / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data               
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                
                loss_val = loss_function(outputs, val_labels.to(device))
                val_acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
                val_loss += loss_val.item()

        val_accurate = val_acc / val_num
        train_accurate= train_acc /train_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, train_loss / train_steps, val_accurate))
        # 保存最优的weight
        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)
        # 将每一轮的train_loss和train_acc,val_loss和val_acc记录
        log.append([epoch,train_loss/train_steps,train_accurate,val_loss/val_steps,val_accurate])
    print('Finished Training')
    
if __name__ == '__main__':
    csv_path="D:/Python/VGG16-CAM/VGG_predict.csv"
    main()
 	data=DataFrame(data=log,columns=['epoch','train_loss','train_acc','val_loss','val_acc'])
 	data.to_csv(csv_path, index_label="index_label")

上面定义的log,并且将log变量定义为全局变量,是为了在程序中断后还可以记录以前运行得到的loss等指标,其实初始的意愿是想要将1000个epoch中得到的acc和loss都进行记录,但是总是在一半因为各种原因程序被kill。准备尝试定义early-stop,当loss的变化在一定范围的时候提前终止训练。后期更新early-stop。

3. predict

# -*- coding: utf-8 -*-
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import vgg

def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize([224, 224]),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "D:/Python/VGG16-CAM/data/val/grade1/test_15524.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)
    
    # create model
    model = vgg(model_name="vgg16", num_classes=5).to(device)
    # load model weights
    weights_path = "D:/Python/VGG16-CAM/vgg16Net.pth"
    print(model)
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                             predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()
if __name__ == '__main__':
    main()

就这样,希望对大家有帮助~

你可能感兴趣的:(深度学习,深度学习,python,图像分类,VGG16)