论文:Learning Deep Features for Discriminative Localization
CAM全称Class Activation Mapping,既类别激活映射图,也被称为类别热力图、显著性图等。是一张和原始图片等同大小图,该图片上每个位置的像素取值范围从0到1,一般用0到255的灰度图表示。可以理解为对预测输出的贡献分布,分数越高的地方表示原始图片对应区域对网络的响应越高、贡献越大。
#使用预训练的VGG网络来演示这种方法
from keras.applications.vgg16 import VGG16
K.clear_session()
model = VGG16(weights='imagenet')
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np
img_path = '/Users/fchollet/Downloads/creative_commons_elephant.jpg'
img = image.load_img(img_path, target_size=(224, 224)) # 大小为224*224的Python图像库图像
x = image.img_to_array(img) # 形状为(224, 224, 3)的float32格式Numpy数组
x = np.expand_dims(x, axis=0) # 添加一个维度,将数组转化为(1, 224, 224, 3)的形状批量
x = preprocess_input(x) #按批量进行预处理(按通道颜色进行标准化)
preds = model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
#使用Grad-CAM算法展示那些部分最像非洲象
african_elephant_output = model.output[:, 386] # 预测向量中的非洲象元素
last_conv_layer = model.get_layer('block5_conv3') # block5_conv3层的输出特征图,它是VGG16的最后一个卷积层
grads = K.gradients(african_elephant_output, last_conv_layer.output)[0] # 非洲象类别相对于block5_conv3输出特征图的梯度
pooled_grads = K.mean(grads, axis=(0, 1, 2)) # 形状是(512, )的向量,每个元素是特定特征图通道的梯度平均大小
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]]) # 这个函数允许我们获取刚刚定义量的值:对于给定样本图像,pooled_grads和block5_conv3层的输出特征图
pooled_grads_value, conv_layer_output_value = iterate([x]) # 给我们两个大象样本图像,这两个量都是Numpy数组
for i in range(512):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i] # 将特征图数组的每个通道乘以这个通道对大象类别重要程度
heatmap = np.mean(conv_layer_output_value, axis=-1) # 得到的特征图的逐通道的平均值即为类激活的热力图
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()
用OpenCV来生成一张图像,将原始图像叠加在刚刚得到的热力图上
import cv2
img = cv2.imread(img_path) # 用cv2加载原始图像
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])) # 将热力图的大小调整为与原始图像相同
heatmap = np.uint8(255 * heatmap) # 将热力图转换为RGB格式
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # 将热力图应用于原始图像
superimposed_img = heatmap * 0.4 + img # 这里的0.4是热力图强度因子
cv2.imwrite('elephant_cam.jpg', superimposed_img) # 将图像保存到硬盘
# simple implementation of CAM in PyTorch for the networks such as ResNet, DenseNet, SqueezeNet, Inception
# last update by BZ, June 30, 2021
import io
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import json
# input image
LABELS_file = 'imagenet-simple-labels.json'
image_file = 'test.jpg'
# networks such as googlenet, resnet, densenet already use global average pooling at the end, so CAM could be used directly.
model_id = 1
if model_id == 1:
net = models.squeezenet1_1(pretrained=True)
finalconv_name = 'features' # this is the last conv layer of the network
elif model_id == 2:
net = models.resnet18(pretrained=True)
finalconv_name = 'layer4'
elif model_id == 3:
net = models.densenet161(pretrained=True)
finalconv_name = 'features'
net.eval()
# hook the feature extractor
features_blobs = []
def hook_feature(module, input, output):
features_blobs.append(output.data.cpu().numpy())
net._modules.get(finalconv_name).register_forward_hook(hook_feature)
# get the softmax weight
params = list(net.parameters())
weight_softmax = np.squeeze(params[-2].data.numpy())
def returnCAM(feature_conv, weight_softmax, class_idx):
# generate the class activation maps upsample to 256x256
size_upsample = (256, 256)
bz, nc, h, w = feature_conv.shape
output_cam = []
for idx in class_idx:
cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
cam = cam.reshape(h, w)
cam = cam - np.min(cam)
cam_img = cam / np.max(cam)
cam_img = np.uint8(255 * cam_img)
output_cam.append(cv2.resize(cam_img, size_upsample))
return output_cam
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
normalize
])
# load test image
img_pil = Image.open(image_file)
img_tensor = preprocess(img_pil)
img_variable = Variable(img_tensor.unsqueeze(0))
logit = net(img_variable)
# load the imagenet category list
with open(LABELS_file) as f:
classes = json.load(f)
h_x = F.softmax(logit, dim=1).data.squeeze()
probs, idx = h_x.sort(0, True)
probs = probs.numpy()
idx = idx.numpy()
# output the prediction
for i in range(0, 5):
print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))
# generate class activation mapping for the top1 prediction
CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])
# render the CAM and output
print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread('test.jpg')
height, width, _ = img.shape
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
cv2.imwrite('CAM.jpg', result)
万字长文:特征可视化技术(CAM)
特征可视化技术——CAM