我在研究生阶段主要做的是深度学习情感识别的可解释性研究,期间做了一点决策机制的可视化工作,主要从三方面:类激活、卷积输出特征、卷积核,找出面部中对情感识别贡献最大的区域,以下是我工作中所用到的一些技术,自己在研究中磕磕绊绊地摸索,希望可以对后来的师弟师妹们一些启发,欢迎交流,
深度学习的可解释性目前仍处于起步阶段,可以将其定义为试图以一种人类可以理解的方式来解释模型的决策过程。大多数的深度学习模型可以通过突出显示输入图像中与预测结果相关的区域来解释,这些基于热力图的解释提供了关于这些显著区域如何促进模型做出相关决策的信息。这种方法的目标是确定每个输入特征对目标输出的贡献。在图像分类任务中,最终分类层之前的特征映射的每个通道都对应于一个特定的类。因此将内部特征甚至输入图像与输出标签的神经元相关联是合理的。这种可视化通过突出输入图像中的“重要”位置或者强烈影响输出的内部特征提供一种归于预测的直观解释。常用的基于可视化的解释性方法有类激活映射(Class Activation Mapping, CAM)[96]方法、特征映射可视化方法[97]和梯度加权类激活映射(Gradient-weighted Class Activation Mapping, Grad-CAM)[63]方法。
类激活映射方法通过生成类激活图来可视化卷积神经网络的关注区域,它通过定位分类器所关注的部分来确定一个样本是否属于一类,这个过程的输出称为类激活映射。类激活映射的思想是利用一种特定的卷积神经网络架构来产生可视化的热力图,类激活映射方法定位了模型认为对分类很重要的类特定的图像区域。
1.用到的package
from keras.models import load_model
import cv2
import numpy as np
from sklearn import metrics
from keras.layers import Layer
from keras.layers import Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
import keras.backend as K
import matplotlib.pyplot as plt
from matplotlib.image import imread
2.CAM函数
说明:model是我自己搭建的模型,keras搭建,函数式编程
# 类激活图函数
# 说明:keras函数式编程,model是实例化后的模型
def CAM(img_path, model, save_path):
# -----------------------------数据处理-------------------------
img = cv2.imread(img_path) # 以三通道方式读取
img = img.astype('float32')
img /= 255
img = np.array(img).reshape(1, 224, 224, 3)
preds = model.predict(img)
pred_label = np.argmax(preds[0])
# preds = preds[0][true_label] # 每张图片的概率值
# -----------------------------应用Grad-CAM算法-------------------------
output = model.output[:, pred_label] # 预测向量中的预测(0或1)元素
last_conv_layer = model.get_layer('conv2d_19') # 获得最后一个卷积层的特征图输出
grads = K.gradients(output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([img])
for i in range(512):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img2 = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img2.shape[1], img2.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.5 + img2
cv2.imwrite(save_path, superimposed_img)
卷积神经网络的结构往往是一种包含多层卷积层的前向神经网络。在卷积神经网络的低层卷积中,提取到的特征往往是低级的语义特征,如图像的边缘、轮廓、角点等简单的结构;而在高层的卷积中,提取到的特征往往是图像类别等高级语义特征。图像在卷积神经网络的反向传播过程中,每一层都会产生由不同卷积核运算得到的二维数据堆叠得到的三维特征块,我们称之为特征图。
# -*- coding: utf-8 -*-
# https://www.jianshu.com/p/3f533a387c74
# 03011_0;03084_1;
import os
# import face_recognition
import tensorflow
# 指定GPU
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # 按照PCI_BUS_ID顺序从0开始排列GPU设备
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 使用第3块GPU进行训练
# # GPU显存占用按需分配
config = tensorflow.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tensorflow.compat.v1.Session(config=config)
tensorflow.compat.v1.disable_eager_execution()
from keras import Model
from keras.models import load_model
from keras.utils import plot_model
import cv2
import numpy as np
from sklearn import metrics
from keras.layers import Layer
from keras.layers import Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
import keras.backend as K
import os
import matplotlib.pyplot as plt
from matplotlib.image import imread
from keras import models
# 自己定义的层
class GlobalMixPooling2D(Layer):
def __init__(self, **kwargs):
super(GlobalMixPooling2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='initial_weight', shape=(1,), initializer='zeros', trainable=True)
super(GlobalMixPooling2D, self).build(input_shape)
def call(self, x):
max_output = GlobalMaxPooling2D()(x)
avg_output = GlobalAveragePooling2D()(x)
kernel = Activation('sigmoid')(self.kernel)
return kernel * max_output + (1.0 - kernel) * avg_output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[3])
# 敏感度定义
def sensitivity(y_true, y_pred):
# Calculates the sensitivity
TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) # TP
P = K.sum(K.round(K.clip(y_true, 0, 1)))
FN = P - TP # FN=P-TP
recall = TP / (TP + FN + K.epsilon()) # TP/(TP+FN)
return recall
img_path = r"03084_1.jpg"
save_path = r"../03084/03084_multiply_2.jpg"
layer_name = r"multiply_2"
model = load_model("model.h5", custom_objects={'sensitivity': sensitivity, 'GlobalMixPooling2D': GlobalMixPooling2D})
model.summary()
img = cv2.imread(img_path) # 以三通道方式读取
img = cv2.resize(img, (224, 224))
img = img.astype('float32')
img /= 255
img = np.array(img).reshape(1, 224, 224, 3)
print(img.shape)
# cv2.imwrite(save_path, superimposed_img)
layer_outputs = model.get_layer(layer_name).output # 获得最后一个卷积层的特征图输出
# 创建一个模型,给定模型输入,可以返回这些输出
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img)
print(activations.shape)
images_per_row = 16
n_features = activations.shape[-1] // 2 # 特征图中的特征个数
size = activations.shape[1] # 特征图的形状为(1,size,size,n_features)
n_cols = n_features // images_per_row # 在这个矩阵中将激活通道平铺,向下取整
display_grid = np.zeros((size * n_cols, images_per_row * size))
for col in range(n_cols): # 将每个过滤器平铺到一个大的水平网络中
for row in range(images_per_row):
channel_image = activations[0, :, :, col * images_per_row + row]
# channel_image -= channel_image.mean() # 对特征值进行后续处理,使其看起来更美观,对所有元素求均值
# channel_image /= channel_image.std() # 计算全局标准差
channel_image *= 64
channel_image += 128
channel_image = np.clip(channel_image, 0, 255).astype('uint8') # 数据裁剪到0到255内,8位图像节省内存空间
display_grid[col * size : (col + 1) * size,
row * size : (row + 1) * size] = channel_image # 显示网格
scale = 1. / size
plt.figure(figsize=(scale * display_grid.shape[1],
scale * display_grid.shape[0]))
# plt.title('conv2d_1')
plt.grid(False) # 不显示网格线
plt.imshow(display_grid, aspect='auto', cmap='viridis')
plt.savefig(save_path)
# -*- coding: utf-8 -*-
import os
# import face_recognition
import tensorflow
# 指定GPU
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # 按照PCI_BUS_ID顺序从0开始排列GPU设备
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 使用第3块GPU进行训练
# # GPU显存占用按需分配
config = tensorflow.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tensorflow.compat.v1.Session(config=config)
tensorflow.compat.v1.disable_eager_execution()
from keras import Model
from keras.models import load_model
import cv2
import numpy as np
from sklearn import metrics
from keras.layers import Layer
from keras.layers import Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
import keras.backend as K
import os
import matplotlib.pyplot as plt
from matplotlib.image import imread
from keras import models
# 自己定义的层
class GlobalMixPooling2D(Layer):
def __init__(self, **kwargs):
super(GlobalMixPooling2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='initial_weight', shape=(1,), initializer='zeros', trainable=True)
super(GlobalMixPooling2D, self).build(input_shape)
def call(self, x):
max_output = GlobalMaxPooling2D()(x)
avg_output = GlobalAveragePooling2D()(x)
kernel = Activation('sigmoid')(self.kernel)
return kernel * max_output + (1.0 - kernel) * avg_output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[3])
# 敏感度定义
def sensitivity(y_true, y_pred):
# Calculates the sensitivity
TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) # TP
P = K.sum(K.round(K.clip(y_true, 0, 1)))
FN = P - TP # FN=P-TP
recall = TP / (TP + FN + K.epsilon()) # TP/(TP+FN)
return recall
model = load_model("model.h5", custom_objects={'sensitivity': sensitivity, 'GlobalMixPooling2D': GlobalMixPooling2D})
model.summary()
layer_names = []
for layer in model.layers[83:84]:
layer_names.append(layer.name)
# visualize kernels
def deprocess_image(x):
x -= x.mean();
x /= (x.std() + 1e-5)
x *= 0.1
x += 0.5
x = np.clip(x, 0, 1)
x *= 255
x = np.clip(x, 0, 255).astype('uint8')
# print("x:", x)
return x
def generate_pattern(layer_name, filter_index, size=150):
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, filter_index])
grads = K.gradients(loss, model.input)[0]
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
iterate = K.function([model.input], [loss, grads])
input_img_data = np.random.random((1, size, size, 3)) * 20 + 128
step = 1
for i in range(40):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
img = input_img_data[0]
# print("img:",img)
return deprocess_image(img)
for layer_name in layer_names:
size = 64
margin = 5
results = np.zeros((8 * size + 7 * margin, 8 * size + 7 * margin, 3), dtype='uint8')
for i in range(8):
for j in range(8):
filter_img = generate_pattern(layer_name, i + (j * 8), size=size)
# print("filter_img:", filter_img)
horizontal_start = i * size + i * margin
horizontal_end = horizontal_start + size
vertical_start = j * size + j * margin
vertical_end = vertical_start + size
results[horizontal_start: horizontal_end, vertical_start:vertical_end, :] = filter_img
# print("sum of results:", np.sum(results))
# print(results.dtype)
plt.figure(figsize=(20, 20))
plt.imshow(results)
plt.savefig(layer_name)
plt.show()