pytorch-grad-cam源代码阅读和调试(上)

笔者使用代码及相关文件下载链接: 【源代码文件】pytorch-grad-cam源代码阅读和调试
源代码链接: jacobgil/pytorch-grad-cam
pytorch-grad-cam源代码阅读和调试(上)
pytorch-grad-cam源代码阅读和调试(中)
pytorch-grad-cam源代码阅读和调试(下)

算法手绘图展示:
pytorch-grad-cam源代码阅读和调试(上)_第1张图片

pytorch-grad-cam源代码阅读和调试(上)_第2张图片

代码实验展示(配套详细注释):

import torch
import argparse
import cv2
import numpy as np
import torch
from torch.autograd import Function
from torchvision import models, transforms

class FeatureExtractor():
    """ Class for extracting activations and
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        # FeatureExtractor(model.layer4, ["2"])
        self.model = model  # model.layer4
        self.target_layers = target_layers  # ["2"]
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)  # torch.Size([1, 2048, 7, 7])

    def __call__(self, x):  # torch.Size([1, 1024, 14, 14])
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            # '0'、 '1'、 '2'
            x = module(x)
            if name in self.target_layers:  # ["2"]
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x   # 单个元素的列表torch.Size([1, 2048, 7, 7]) torch.Size([1, 2048, 7, 7])

class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, feature_module, target_layers):
        # ModelOutputs(model, model.layer4, ["2"])
        self.model = model  # model
        self.feature_module = feature_module  # model.layer4
        self.feature_extractor = FeatureExtractor(self.feature_module, target_layers)
        # FeatureExtractor(model.layer4, ["2"])

    def get_gradients(self):
        return self.feature_extractor.gradients  # 只有一个元素列表类型 torch.Size([1, 2048, 7, 7])

    def __call__(self, x):
        # target_activations = []  # 这行代码没有意义
        for name, module in self.model._modules.items():  # 遍历有序字典
        # 'conv1' 'bn1' 'relu' 'maxpool' 'layer1' 
        # 'layer2' 'layer3' 'layer4'  'avgpool' 'fc'
            if module == self.feature_module:  # model.layer4
                target_activations, x = self.feature_extractor(x) 
                # torch.Size([1, 1024, 14, 14]) -> torch.Size([1, 2048, 7, 7])
            elif "avgpool" in name.lower():  # 'avgpool'
                x = module(x)  # torch.Size([1, 2048, 7, 7]) -> torch.Size([1, 2048, 1, 1])
                x = x.view(x.size(0),-1)  # torch.Size([1, 2048])
            else:
                x = module(x)

        return target_activations, x  # 列表torch.Size([1, 2048, 7, 7]), torch.Size([1, 1000])

def preprocess_image(img):
    '''将numpy的(H, W, RGB)格式多维数组转为张量后再进行指定标准化,最后再增加一个batchsize维度后返回'''
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    preprocessing = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    return preprocessing(img.copy()).unsqueeze(0)

def show_cam_on_image(img, mask):
    '''将mask图片转化为热力图,叠加到img上,再返回np.uint8格式的图片.'''
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    return np.uint8(255 * cam)

class GradCam:
    def __init__(self, model, feature_module, target_layer_names, use_cuda):
        # GradCam(model=model, feature_module=model.layer4, \
        #                target_layer_names=["2"], use_cuda=args.use_cuda)
        self.model = model  # model
        self.feature_module = feature_module  # model.layer4
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        self.extractor = ModelOutputs(self.model, self.feature_module, target_layer_names)
        # ModelOutputs(model, model.layer4, ["2"])
    def forward(self, input_img):  # 似乎这个方法没有使用到,注释掉之后没有影响,没有被执行到
        print("林麻子".center(50,'-'))  # 这行打印语句用来证明,该方法并没有被调用执行.
        return self.model(input_img)  

    def __call__(self, input_img, target_category=None):
        if self.cuda:
            input_img = input_img.cuda()  # torch.Size([1, 3, 224, 224])

        features, output = self.extractor(input_img)  # 保存中间特征图的列表, 以及网络最后输出的分类结果
        # 列表[torch.Size([1, 2048, 7, 7])], 张量:torch.Size([1, 1000])
        if target_category == None:
            target_category = np.argmax(output.cpu().data.numpy())  # 多维数组展平后最大值的索引
            #   243

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)  # 独热编码,shape:(1, 1000)
        one_hot[0,target_category] = 1  # 独热编码  shape (1, 1000) # one_hot[0][target_category] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(False)  # torch.Size([1, 1000]) # requires_grad_(True)
        if self.cuda:
            one_hot = one_hot.cuda()
        
        loss = torch.sum(one_hot * output)  # tensor(9.3856, grad_fn=) one_hot = torch.sum(one_hot * output)

        self.feature_module.zero_grad()  # 将模型的所有参数的梯度清零.
        self.model.zero_grad()  # 将模型的所有参数的梯度清零.
        loss.backward()  # one_hot.backward(retain_graph=True)  

        grads_val = self.extractor.get_gradients()[0].cpu().data.numpy()  # shape:(1, 2048, 7, 7)  # 顾名思义,梯度值
        # 注: self.extractor.get_gradients()[-1]返回保存着梯度的列表,[-1]表示最后一项,即最靠近输入的一组特征层上的梯度
        target = features[-1]  # torch.Size([1, 2048, 7, 7])  列表中的最后一项,也是唯一的一项,特征图
        target = target.cpu().data.numpy()[0, :]  # shape: (2048, 7, 7)

        weights = np.mean(grads_val, axis=(2, 3))[0, :]  # shape: (2048,)  计算每个特征图上梯度的均值,以此作为权重
        cam = np.zeros(target.shape[1:], dtype=np.float32)  # 获得零矩阵 shape: (7, 7)

        for i, w in enumerate(weights):  # 迭代遍历该权重
            cam += w * target[i, :, :]   # 使用该权重,对特征图进行线性组合

        cam = np.maximum(cam, 0)  # shape: (7, 7) # 相当于ReLU函数
        # print(type(input_img.shape[3:1:-1]),'cxq林麻子cxq',input_img.shape[3:1:-1])
        # print(type(input_img.shape[2:]),'cxq林麻子cxq',input_img.shape[2:])
        cam = cv2.resize(cam, input_img.shape[3:1:-1])  # shape: (224, 224) # 这里要留意传入的形状是(w,h) 所以这里切片的顺序是反过来的
        cam = cam - np.min(cam)  # shape: (224, 224)  # 以下两部是做归一化
        cam = cam / np.max(cam)  # shape: (224, 224)  # 归一化,取值返回是[0,1]
        return cam  # shape: (224, 224) 取值返回是[0,1]


class GuidedBackpropReLU(Function):
    '''特殊的ReLU,区别在于反向传播时候只考虑大于零的输入和大于零的梯度'''
    
    '''
    @staticmethod
    def forward(ctx, input_img):  # torch.Size([1, 64, 112, 112])
        positive_mask = (input_img > 0).type_as(input_img)  # torch.Size([1, 64, 112, 112])
        # output = torch.addcmul(torch.zeros(input_img.size()).type_as(input_img), input_img, positive_mask)
        output = input_img * positive_mask  # 这行代码和上一行的功能相同
        ctx.save_for_backward(input_img, output)
        return output  # torch.Size([1, 64, 112, 112])
    '''
    # 上部分定义的函数功能和以下定义的函数一致
    @staticmethod
    def forward(ctx, input_img):  # torch.Size([1, 64, 112, 112])
        output = torch.clamp(input_img, min=0.0)
        # print('函数中的输入张量requires_grad',input_img.requires_grad)
        ctx.save_for_backward(input_img, output)
        return output  # torch.Size([1, 64, 112, 112])

    @staticmethod
    def backward(ctx, grad_output):  # torch.Size([1, 2048, 7, 7])
        input_img, output = ctx.saved_tensors  # torch.Size([1, 2048, 7, 7]) torch.Size([1, 2048, 7, 7])
        # grad_input = None  # 这行代码没作用
        positive_mask_1 = (input_img > 0).type_as(grad_output)  # torch.Size([1, 2048, 7, 7])  输入的特征大于零
        positive_mask_2 = (grad_output > 0).type_as(grad_output)  # torch.Size([1, 2048, 7, 7])  梯度大于零
        # grad_input = torch.addcmul(
        #                             torch.zeros(input_img.size()).type_as(input_img),
        #                             torch.addcmul(
        #                                             torch.zeros(input_img.size()).type_as(input_img), 
        #                                             grad_output,
        #                                             positive_mask_1
        #                             ), 
        #                             positive_mask_2
        # )
        grad_input = grad_output * positive_mask_1 * positive_mask_2  # 这行代码的作用和上一行代码相同
        return grad_input


class GuidedBackpropReLUModel:
    '''相对于某个类别(默认是最大置信度对应的类别)的置信度得分,计算输入图片上的梯度,并返回'''
    def __init__(self, model, use_cuda):  
        # GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        def recursive_relu_apply(module_top):
            '''递归地将模块内的relu模块替换掉用户自己定义的GuidedBackpropReLU模块 '''
            for idx, module in module_top._modules.items():
                recursive_relu_apply(module)
                if module.__class__.__name__ == 'ReLU':  # module对象所属的类,该类的名称
                    # print('成功替换...')  # 验证确实得到了替换
                    module_top._modules[idx] = GuidedBackpropReLU.apply
        # replace ReLU with GuidedBackpropReLU
        recursive_relu_apply(self.model)

    # def forward(self, input_img):
    #     return self.model(input_img)

    def __call__(self, input_img, target_category=None):
        '''相对于某个类别(默认是最大置信度对应的类别)的置信度得分,计算输入图片上的梯度,并返回'''
        if self.cuda:
            input_img = input_img.cuda()

        input_img = input_img.requires_grad_(True)  # torch.Size([1, 3, 224, 224])
        output = self.model(input_img)  # torch.Size([1, 1000])
        if target_category == None:
            target_category = np.argmax(output.cpu().data.numpy())  # 243

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)  # (1, 1000)
        one_hot[0, target_category] = 1  # one_hot[0][target_category] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(False)  # torch.Size([1, 1000])
        # one_hot = torch.from_numpy(one_hot).requires_grad_(True)  # 这个张量不需要计算梯度
        if self.cuda:
            one_hot = one_hot.cuda()

        loss = torch.sum(one_hot * output)
        loss.backward()  # one_hot.backward(retain_graph=True)

        img_grad = input_img.grad.cpu().data.numpy()  # shape (1, 3, 224, 224)
        img_grad = img_grad[0, :, :, :]  # shape (3, 224, 224)

        return img_grad  # shape (3, 224, 224)

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=False,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--image-path', type=str, default='./examples/both.png',  # default='./examples/1.jpg', # './examples/both.png'
                        help='Input image path')  # default='./examples/both.png',
    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print("Using GPU for acceleration")
    else:
        print("Using CPU for computation")

    return args

def deprocess_image(img):
    '''先作标准化处理,然后做变换y=0.1*x+0.5,限定[0,1]区间后映射到[0,255]区间'''
    """ see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """
    img = img - np.mean(img)
    img = img / (np.std(img) + 1e-5)
    img = img * 0.1
    img = img + 0.5
    img = np.clip(img, 0, 1)
    return np.uint8(img*255)

if __name__ == '__main__':
    """ python grad_cam.py 
    1. Loads an image with opencv.
    2. Preprocesses it for VGG19 and converts to a pytorch variable.
    3. Makes a forward pass to find the category index with the highest score,
    and computes intermediate activations.
    Makes the visualization. """

    args = get_args()  
    # 默认情况下: args.image_path = './examples/both.png', 
    # 默认情况下: args.use_cuda = False, 
    model = models.resnet50(pretrained=True)
    grad_cam = GradCam(model=model, feature_module=model.layer4, \
                       target_layer_names=["2"], use_cuda=args.use_cuda)

    img = cv2.imread(args.image_path, 1)  # 读取图片文件 (H, W, BGR)
    # If set, always convert image to the 3 channel BGR color image. 
    img = np.float32(img) / 255  # 转为float32类型,范围是[0,1]
    # Opencv loads as BGR:
    img = img[:, :, ::-1]  # BGR格式转换为RGB格式 shape: (224, 224, 3) 即(H, W, RGB)
    input_img = preprocess_image(img)  # torch.Size([1, 3, 224, 224])

    # If None, returns the map for the highest scoring category.
    # Otherwise, targets the requested category.
    target_category = None
    grayscale_cam = grad_cam(input_img, target_category=None)  # shape: (224, 224)

    grayscale_cam = cv2.resize(grayscale_cam, (img.shape[1], img.shape[0]))  
    # shape: (224, 224) # 这里要留意传入的形状是(w,h)  其实以上这行代码不需要执行,暂且先留着


    cam = show_cam_on_image(img, grayscale_cam)  # shape: (224, 224, 3)
    cv2.imwrite("cam.jpg", cam)  # 保存图片


    # -----------------------------------------------------------------------------------


    gb_model = GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)
    # input_img.grad.zero_()  # AttributeError: 'NoneType' object has no attribute 'zero_'
    gb = gb_model(input_img, target_category=None)  # shape: (3, 224, 224) 相对于输入图像的梯度
    gb = gb.transpose((1, 2, 0))  # 调整通道在维度中的位置顺序 shape:(224, 224, 3)  相对于输入图像的梯度

    cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])  # shape:(224, 224, 3) # 由多个单通道的数组创建一个多通道的数组
    cam_gb = deprocess_image(cam_mask*gb)  # shape: (224, 224, 3)
    cv2.imwrite('cam_gb.jpg', cam_gb)  # 保存图片

    gb = deprocess_image(gb)  # shape: (224, 224, 3)
    cv2.imwrite('gb.jpg', gb)  # 保存图片


    # -----------------------------------------------------------------------------------
    

    # cv2.imwrite("cam.jpg", cam)  # 保存图片
    # cv2.imwrite('gb.jpg', gb)  # 保存图片
    # cv2.imwrite('cam_gb.jpg', cam_gb)  # 保存图片


# 运行程序: python gradcam.py --image-path 1.jpg
# 运行程序: python gradcam.py --image-path ./examples/both.png

运行效果截图展示(cmd下输入命令运行程序: python gradcam.py --image-path 1.jpg):

你可能感兴趣的:(opencv-python,4.4.0.46,学习笔记,NumPy学习笔记)