cv_baseline——Alexnet

cv_baseline——Alexnet

文章目录

  • cv_baseline——Alexnet
    • 一、inference
    • 二、可视化的代码
    • 三、train_alexnet

一、inference

下面是比较清晰的一个inference的以及可视化的思路。

函数主要有三个

1、标签下载的处理 load_class_names

2、 图片的下载的处理 process_img

3、模型的定义 get_model

Alexnet_inference.py

import os
import json
import time

import matplotlib.pyplot as plt
import torch
import torchvision.models as models
import torchvision.transforms as transforms
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 标签下载,分别是json和txt文件
def load_class_names(p_clsnames,p_clsnames_cn):
    """
    :param p_clsnames:
    :param p_clsnames_cn:
    :return:
    """
    with open(p_clsnames,"r") as f:
        class_names = json.load(f)
    with open(p_clsnames_cn, encoding='UTF-8') as f:
        class_names_cn = f.readlines()
    return class_names,class_names_cn


# 对数据的处理
def process_img(path_img):

    #hard code
    norm_mean = [0.485, 0.456, 0.406]
    norm_std = [0.229, 0.224, 0.225]
    inference_transform = transforms.Compose([transforms.Resize(256),
                                              transforms.CenterCrop((224,224)),
                                              transforms.ToTensor(),
                                              transforms.Normalize(norm_mean,norm_std)])

    img_rgb = Image.open(path_img).convert('RGB')

    img_tensor = inference_transform(img_rgb)
    img_tensor.unsqueeze_(0)
    img_tensor = img_tensor.to(device)
    return img_rgb,img_tensor

# 模型的加载
def get_model(path_dict,vis_model=True):
    model = models.alexnet()
    pretrained_state_dict = torch.load(path_dict)
    model.load_state_dict(pretrained_state_dict)
    model.eval()
    model.to(device)


    if vis_model:
        from torchsummary import summary
        summary(model,input_size=(3,224,224),device="cuda")
    return model

# 下面的主函数有5个部分,分别是1、加载图片;2、加载模型;3、模型推理;4.获取类别;5,分类结果可视化;
if __name__ == "__main__":
    path_state_dict = os.path.join(BASE_DIR,"..","data","alexnet-owt-4df8aa71.pth")
    path_image = os.path.join(BASE_DIR,"..","data","Golden Retriever from baidu.jpg")
    path_classnames = os.path.join(BASE_DIR, "..", "data", "imagenet1000.json")
    path_classnames_cn = os.path.join(BASE_DIR, "..", "data", "imagenet_classnames.txt")
    print(os.path.abspath(__file__))

    # load clsnames

    cls_n, cls_cn = load_class_names(path_classnames,path_classnames_cn)

    # 1/5 load img
    img_rgb,img_tensor = process_img(path_image)

    # 2/5 load model
    alexnet_model = get_model(path_state_dict,True)

    # 3/5 inference tensor --> vector
    with torch.no_grad():
        time_begin = time.time()
        output = alexnet_model(img_tensor)
        time_end = time.time()

    # 4/5 index to class  names
    _,pred_int = torch.max(output.data,1)
    _,top5_idx = torch.topk(output.data,5,dim=1)

    pred_idx = int(pred_int.cpu().numpy())
    pre_str,pred_cn = cls_n[pred_idx], cls_cn[pred_idx]
    print("img: {} is: {}\n{}".format(os.path.basename(path_image), pre_str, pred_cn))
    print("time consuming:{:.2f}s".format(time_begin - time_end))

    # 5/5 visualization
    plt.imshow(img_rgb)
    plt.title("predict:{}".format(pre_str))
    top5_num = top5_idx.cpu().numpy().squeeze()
    text_str = [cls_n[t] for t in top5_num]
    for idx in range(len(top5_num)):
        plt.text(5, 15+idx*30, "top {}:{}".format(idx+1, text_str[idx]), bbox=dict(fc='yellow'))
    plt.show()

二、可视化的代码

可视化的代码:主要的代码有两个部分

1、卷积核可视化

2、特征图可视化

alexnet_visualization.py

import os
import torch
import torch.nn as nn
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
import torchvision.utils as vutils
import torchvision.models as models
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if __name__ == "__main__":

    log_dir = os.path.join(BASE_DIR, "..", "results")
    # 1、下面开始是卷积核可视化
    # ----------------------------------- kernel visualization -----------------------------------
    writer = SummaryWriter(log_dir=log_dir, filename_suffix="_kernel")

    # m1
    # alexnet = models.alexnet(pretrained=True)

    # m2
    path_state_dict = os.path.join(BASE_DIR, "..", "data", "alexnet-owt-4df8aa71.pth")
    alexnet = models.alexnet()
    pretrained_state_dict = torch.load(path_state_dict)
    alexnet.load_state_dict(pretrained_state_dict)

    kernel_num = -1
    vis_max = 1
    for sub_module in alexnet.modules():
        if not isinstance(sub_module, nn.Conv2d):
            continue
        kernel_num += 1
        if kernel_num > vis_max:
            break

        kernels = sub_module.weight
        c_out, c_int, k_h, k_w = tuple(kernels.shape)

        # 拆分channel
        for o_idx in range(c_out):
            kernel_idx = kernels[o_idx, :, :, :].unsqueeze(1)  # 获得(3, h, w), 但是make_grid需要BCHW,这里拓展C维度变为(3, 1, h, w)
            kernel_grid = vutils.make_grid(kernel_idx, normalize=True, scale_each=True, nrow=c_int)
            writer.add_image('{}_Convlayer_split_in_channel'.format(kernel_num), kernel_grid, global_step=o_idx)

        kernel_all = kernels.view(-1, 3, k_h, k_w)  # 3, h, w
        kernel_grid = vutils.make_grid(kernel_all, normalize=True, scale_each=True, nrow=8)  # c, h, w
        writer.add_image('{}_all'.format(kernel_num), kernel_grid, global_step=620)  # global_step 是x的坐标

        print("{}_convlayer shape:{}".format(kernel_num, tuple(kernels.shape)))
        
    
    
	# 2、下面开始是特征图可视化
    # ----------------------------------- feature map visualization -----------------------------------
    writer = SummaryWriter(log_dir=log_dir, filename_suffix="_feature map")

    # 数据
    path_img = os.path.join(BASE_DIR, "..", "data", "tiger cat.jpg")  # your path to image
    normMean = [0.49139968, 0.48215827, 0.44653124]
    normStd = [0.24703233, 0.24348505, 0.26158768]
    norm_transform = transforms.Normalize(normMean, normStd)
    img_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        norm_transform
    ])

    img_pil = Image.open(path_img).convert('RGB')
    img_tensor = img_transforms(img_pil)
    img_tensor.unsqueeze_(0)  # chw --> bchw

    # 模型
    # alexnet = models.alexnet(pretrained=True)

    # forward
    convlayer1 = alexnet.features[0]
    fmap_1 = convlayer1(img_tensor)

    # 预处理
    fmap_1.transpose_(0, 1)  # bchw=(1, 64, 55, 55) --> (64, 1, 55, 55)
    fmap_1_grid = vutils.make_grid(fmap_1, normalize=True, scale_each=True, nrow=8)

    writer.add_image('feature map in conv1', fmap_1_grid, global_step=620)
    writer.close()

最后这些可视化的结果都写入到了logs文件中

命令是

tensorboard --logdir=./results

三、train_alexnet

训练的代码的结构:

1、构建DataLoader

2、构建模型

3、构建损失函数

4、构建优化器

5、迭代训练

下面是datasets的代码

mydatasets.py

# -*- coding: utf-8 -*-
"""
# @brief      : 数据集Dataset定义
"""
import numpy as np
import torch
import os
import random
from PIL import Image
from torch.utils.data import Dataset

random.seed(1)
rmb_label = {"1": 0, "100": 1}


class CatDogDataset(Dataset):
    def __init__(self, data_dir, mode="train", split_n=0.9, rng_seed=620, transform=None):
        """
        rmb面额分类任务的Dataset
        :param data_dir: str, 数据集所在路径
        :param transform: torch.transform,数据预处理
        """
        self.mode = mode
        self.data_dir = data_dir
        self.rng_seed = rng_seed
        self.split_n = split_n
        self.data_info = self._get_img_info()  # data_info存储所有图片路径和标签,在DataLoader中通过index读取样本
        self.transform = transform

    def __getitem__(self, index):
        path_img, label = self.data_info[index]
        img = Image.open(path_img).convert('RGB')     # 0~255

        if self.transform is not None:
            img = self.transform(img)   # 在这里做transform,转为tensor等等

        return img, label

    def __len__(self):
        if len(self.data_info) == 0:
            raise Exception("\ndata_dir:{} is a empty dir! Please checkout your path to images!".format(self.data_dir))
        return len(self.data_info)

    def _get_img_info(self):
        '''
        这个函数返回的是各个图片的绝对路径,及其对应的标签
        '''
        img_names = os.listdir(self.data_dir)
        img_names = list(filter(lambda x: x.endswith('.jpg'), img_names))

        # 这里一定要设置随机种子,不然换一组参数重新训练的时候,数据集顺序就变了,就没有控制变量
        random.seed(self.rng_seed)
        random.shuffle(img_names)

        img_labels = [0 if n.startswith('cat') else 1 for n in img_names]

        split_idx = int(len(img_labels) * self.split_n)  # 25000* 0.9 = 22500
        # split_idx = int(100 * self.split_n)
        if self.mode == "train":
            img_set = img_names[:split_idx]     # 数据集90%训练
            # img_set = img_names[:22500]     #  hard code 数据集90%训练
            label_set = img_labels[:split_idx]
        elif self.mode == "valid":
            img_set = img_names[split_idx:]
            label_set = img_labels[split_idx:]
        else:
            raise Exception("self.mode 无法识别,仅支持(train, valid)")


        # 下面就是拼接绝对路径了
        path_img_set = [os.path.join(self.data_dir, n) for n in img_set]
        data_info = [(n, l) for n, l in zip(path_img_set, label_set)]

        return data_info

train.py

# -*- coding: utf-8 -*-
"""
# @file name  : train_alexnet.py
# @author     : TingsongYu https://github.com/TingsongYu
# @date       : 2020-02-14
# @brief      : alexnet traning
"""

import os
import numpy as np
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.optim as optim
from matplotlib import pyplot as plt
import torchvision.models as models
from A_alexnet.tools.my_dataset import CatDogDataset

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def get_model(path_state_dict, vis_model=False):
    """
    创建模型,加载参数
    :param path_state_dict:
    :return:
    """
    model = models.alexnet()
    pretrained_state_dict = torch.load(path_state_dict)
    model.load_state_dict(pretrained_state_dict)

    if vis_model:
        from torchsummary import summary
        summary(model, input_size=(3, 224, 224), device="cpu")

    model.to(device)
    return model


if __name__ == "__main__":

    # config
    data_dir = os.path.join(BASE_DIR, "..", "data", "train")
    path_state_dict = os.path.join(BASE_DIR, "..", "data", "alexnet-owt-4df8aa71.pth")
    num_classes = 2

    MAX_EPOCH = 3       # 可自行修改
    BATCH_SIZE = 128    # 可自行修改
    LR = 0.001          # 可自行修改
    log_interval = 1    # 可自行修改
    val_interval = 1    # 可自行修改
    classes = 2
    start_epoch = -1
    lr_decay_step = 1   # 可自行修改

    # ============================ step 1/5 数据 ============================
    norm_mean = [0.485, 0.456, 0.406]
    norm_std = [0.229, 0.224, 0.225]

    train_transform = transforms.Compose([
        # (256)区别:一个256的话是短边resize到256,长边缩小到相应比例,不一定是256
        # (256, 256)的话则是长边和短边都缩小到256
        transforms.Resize((256)),
        # 然后从中心截取(256,256)的矩形
        transforms.CenterCrop(256),
        # 随机再从里面裁剪出来224的
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std),
    ])

    normalizes = transforms.Normalize(norm_mean, norm_std)
    valid_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        # 经过下面的操作,这里会生成10张图片
        transforms.TenCrop(224, vertical_flip=False),
        # 一般的维度是BCHW,经过下面的lamda处理,会生成一个4维的张量,[10,C,H,W]
        # 这里有问题,没有经过dataloader的时候,是没有batch这个维度的
        # 这里的lamda crops是前面那生成的10张图片,经过下面的一行代码,就生成了[10,C,H,W]的形式
        transforms.Lambda(lambda crops: torch.stack([normalizes(transforms.ToTensor()(crop)) for crop in crops])),
    ])

    # 构建MyDataset实例
    train_data = CatDogDataset(data_dir=data_dir, mode="train", transform=train_transform)
    valid_data = CatDogDataset(data_dir=data_dir, mode="valid", transform=valid_transform)

    # 构建DataLoder
    train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(dataset=valid_data, batch_size=4)

    # ============================ step 2/5 模型 ============================
    alexnet_model = get_model(path_state_dict, False)

    num_ftrs = alexnet_model.classifier._modules["6"].in_features
    alexnet_model.classifier._modules["6"] = nn.Linear(num_ftrs, num_classes)

    alexnet_model.to(device)
    # ============================ step 3/5 损失函数 ============================
    criterion = nn.CrossEntropyLoss()
    # ============================ step 4/5 优化器 ============================
    # 冻结卷积层
    flag = 0
    # flag = 1
    if flag:
        fc_params_id = list(map(id, alexnet_model.classifier.parameters()))  # 返回的是parameters的 内存地址
        # 下面的代码就是过滤掉参数不在fc层的,就是其他的卷积层了
        base_params = filter(lambda p: id(p) not in fc_params_id, alexnet_model.parameters())
        optimizer = optim.SGD([
            {'params': base_params, 'lr': LR * 0.1},  # 0  这里如果设置为0的话,意思就是完全不更新了。冻结了卷积层
            {'params': alexnet_model.classifier.parameters(), 'lr': LR}], momentum=0.9)

    else:
        optimizer = optim.SGD(alexnet_model.parameters(), lr=LR, momentum=0.9)  # 选择优化器

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_decay_step, gamma=0.1)  # 设置学习率下降策略
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(patience=5)

# ============================ step 5/5 训练 ============================
    train_curve = list()
    valid_curve = list()

    for epoch in range(start_epoch + 1, MAX_EPOCH):

        loss_mean = 0.
        correct = 0.
        total = 0.

        alexnet_model.train()
        for i, data in enumerate(train_loader):

            # if i > 1:
            #     break

            # forward
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = alexnet_model(inputs)

            # backward
            optimizer.zero_grad()
            loss = criterion(outputs, labels)
            loss.backward()

            # update weights
            optimizer.step()

            # 统计分类情况
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).squeeze().cpu().sum().numpy()

            # 打印训练信息
            loss_mean += loss.item()
            train_curve.append(loss.item())
            if (i+1) % log_interval == 0:
                loss_mean = loss_mean / log_interval
                print("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
                    epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total))
                loss_mean = 0.

        scheduler.step()  # 更新学习率

        # validate the model
        if (epoch+1) % val_interval == 0:

            correct_val = 0.
            total_val = 0.
            loss_val = 0.
            alexnet_model.eval()
            with torch.no_grad():
                for j, data in enumerate(valid_loader):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)

                    bs, ncrops, c, h, w = inputs.size()     # [4, 10, 3, 224, 224]
                    outputs = alexnet_model(inputs.view(-1, c, h, w))
                    # 现在outputs的维度是[40, 2],其实是4张图片,每张图片有10个变换
                    # 经过下面的view,维度变为[4,10,2],然后在10这个维度上取均值,最终outputs_avg维度为[4,2]
                    # 如果看不懂,下面有具体解释
                    outputs_avg = outputs.view(bs, ncrops, -1).mean(1)

                    loss = criterion(outputs_avg, labels)

                    _, predicted = torch.max(outputs_avg.data, 1)
                    total_val += labels.size(0)
                    correct_val += (predicted == labels).squeeze().cpu().sum().numpy()

                    loss_val += loss.item()

                loss_val_mean = loss_val/len(valid_loader)
                valid_curve.append(loss_val_mean)
                print("Valid:\t Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
                    epoch, MAX_EPOCH, j+1, len(valid_loader), loss_val_mean, correct_val / total_val))
            alexnet_model.train()

    train_x = range(len(train_curve))
    train_y = train_curve

    train_iters = len(train_loader)
    valid_x = np.arange(1, len(valid_curve)+1) * train_iters*val_interval # 由于valid中记录的是epochloss,需要对记录点进行转换到iterations
    valid_y = valid_curve

    plt.plot(train_x, train_y, label='Train')
    plt.plot(valid_x, valid_y, label='Valid')

    plt.legend(loc='upper right')
    plt.ylabel('loss value')
    plt.xlabel('Iteration')
    plt.show()



下面是alexnet的网络结构代码,替换掉的是classifier中的第六个网络层

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

有关mean的测试

a = np.array([[1, 3],
              [2, 4],
              [3, 5]])
b = a.mean(0)
print(b)
# b = [2, 4]

你可能感兴趣的:(深度学习,pytorch,python)