Keras 实现 Grad-CAM 基于 VGG16模型

Keras 实现 Grad-CAM 基于 VGG16模型以及猫狗数据集

  • 基于keras 实现VGG-16图片分类模型
    • 数据集下载以及预处理
    • VGG模型训练
  • Grad-CAM

基于keras 实现VGG-16图片分类模型

数据集下载以及预处理

猫狗数据集是kaggle上的一个数据集,可以直接上去登录下载,
在CNN中一般要求输入尺寸固定( 原始CAM不需要),我们先将下载好的数据集进行裁剪:

import cv2
import os


# 数据预处理,把图片数据集的所有图片修剪成固定大小形状
def image_tailor(input_dir, out_dir):
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            # file为root目录中的文件
            filepath = os.path.join(root, file)     # 连接两个或更多的路径名组件,filepath路径为/root/file
            try:
                image = cv2.imread(filepath)        # 根据输入路径读取照片
                dim = (224, 224)                    # 裁剪的尺寸
                resized = cv2.resize(image, dim)    # 按比例将原图缩放成224*224
                path = os.path.join(out_dir, file)  # 保存的路径和相应的文件名
                cv2.imwrite(path, resized)          # 进行保存
            except:
                print(filepath)
                os.remove(filepath)
        cv2.waitKey()


input_patch = './train' # 数据集的地址
out_patch = './CAD_train'  # 图片裁剪后保存的地址
image_tailor(input_patch, out_patch)
print('reshape finished')

**这里碰到的一个问题是电脑内存太小,整个训练集一共25000张图片,每张图片224*224*3*8bit,

VGG模型训练

第一次获取数据比较麻烦,得从图片中读取,可以读取后保存成其他形式方便下次读取
最开始一次性读入所有文件,由于数据量太大,无法进行预处理(归一化需要float类型),这里将数据分组,然后进行训练:

import cv2
import os
import shutil


def batch_process(file_path, batch_size, validationnum):
    batchnum = (25000 - validationnum)//batch_size
    val_num = validationnum//1000
    if not os.path.exists(os.path.join(file_path, "batch1")):
        for i in range(1, batchnum + 1):
            os.mkdir(os.path.join(file_path, "batch" + str(i)))
    if not os.path.exists(os.path.join(file_path, "validation")):
        for i in range(1, val_num + 1):
            os.mkdir(os.path.join(file_path, "validation" + str(i)))
    num = batch_size/2
    i = 1
    cat_num = 0
    dog_num = 0
    for file in os.listdir(file_path):  # file_dir文件夹下所有的文件
        name = file.split(sep='.')  # 对文件名以'.'做划分
        if name[0] == 'dog':
            if dog_num == num:
                dog_num = 0
                i += 1
                if i == batchnum + 1:
                    break
            dog_num += 1
            shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, "batch" + str(i))))
    i = 1
    for file in os.listdir(file_path):  # file_dir文件夹下所有的文件
        name = file.split(sep='.')  # 对文件名以'.'做划分
        if name[0] == 'cat':
            if cat_num == num:
                cat_num = 0
                i += 1
                if i == batchnum + 1:
                    break
            cat_num += 1
            shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, "batch" + str(i))))
    i = 1
    k = 0
    for file in os.listdir(file_path):  # file_dir文件夹下所有的文件
        name = file.split(sep='.')
        if name[-1] == 'jpg':
            if k == 1000:
                k = 0
                i += 1
            k += 1
            shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, 'validation' + str(i))))



    print('batch processed')

batch_process('.\CAD_train_224', 2000, 5000)

下面是分组后的结果
Keras 实现 Grad-CAM 基于 VGG16模型_第1张图片
分组之后也无法直接训练,GPU显存太小,然后模型又比较复杂,所以每次读入2000个样本后分批训练,每次只训练8个

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras import regularizers
import numpy as np
import os
import cv2
from keras.models import load_model


def get_datas(file_dir):
    # file_dir: 文件夹路径
    # return: 乱序后的图片和标签
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    # 载入数据路径并写入标签值
    for file in os.listdir(file_dir):  # file_dir文件夹下所有的文件
        name = file.split(sep='.')  # 对文件名以'.'做划分
        # print(name)
        if name[0] == 'cat':
            #cats.append(file_dir + file)
            cats.append((os.path.join(file_dir,file)))
            label_cats.append(0)
        elif name[0] == 'dog':
            #dogs.append(file_dir + file)
            dogs.append((os.path.join(file_dir,file)))
            label_dogs.append(1)
    print("There are %d cats\nThere are %d dogs" % (len(cats), len(dogs)))
    # print('cats:', cats)
    # print('label_cats:', label_cats)
    # print('dogs:', dogs)
    # print('label_cats:', label_dogs)

    # 打乱文件顺序
    image_list = np.hstack((cats, dogs))  # 将cats和dogs矩阵按水平拼接
    # print('image_list:', image_list)
    label_list = np.hstack((label_cats, label_dogs))
    # print(label_list)
    temp = np.array([image_list, label_list])
    temp = temp.transpose()     # 转置
    np.random.shuffle(temp)  # 打乱顺序

    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]
    train_image = []
    for name in image_list:
        image = cv2.imread(os.path.join(file_dir, file))
        # image = cv2.resize(image, (112, 112))
        train_image.append(image)
    train_image = np.array(train_image)
    label_list = np.array(label_list[0:2000])
    print('data loaded')
    return train_image, label_list


def create_model():

    # Block1
    # layer1 Conv Output = 224*224*64
    model.add(Conv2D(64, (3, 3),
                     padding='same',
                     input_shape=(224, 224, 3),
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    # model.add(BatchNormalization())
    # model.add(Dropout(0.3))
    # layer2 Conv Output = 224*224*64
    model.add(Conv2D(64, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    # model.add(BatchNormalization())
    # Maxpool Output = 112*112*64
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # Block2
    # layer3 Conv Output = 112*112*128
    model.add(Conv2D(64, (3, 3),
                     padding='same',
                     input_shape=(112, 112, 3),
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer4 Conv Output = 112*112*128
    model.add(Conv2D(128, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    # Maxpool Output = 56*56*128
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # Block3
    # layer5 Conv Output = 56*56*256
    model.add(Conv2D(256, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer6 Conv Output = 56*56*256
    model.add(Conv2D(256, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    # model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer7 Conv Output = 56*56*256
    model.add(Conv2D(256, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    # Maxpool Output = 28*28*256
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # Block4
    # layer8 Conv Output = 28*28*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer9 Conv Output = 28*28*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    # model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer10 Conv Output = 28*28*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    # Maxpool Output = 14*14*512
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # Block5
    # layer11 Conv Output = 14*14*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer12 Conv Output = 14*14*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    # model.add(BatchNormalization())
    model.add(Dropout(0.4))
    # layer13 Conv Output = 14*14*512
    model.add(Conv2D(512, (3, 3),
                     padding='same',
                     activation='relu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    # Maxpool Output = 7*7*512
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # Fully connected Layer
    model.add(Flatten())
    # layer14 fc Output = 1000
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.3))
    # layer15 fc Output = 50
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.3))
    # layer16 fc Output = 2
    model.add(Dense(2))
    model.add(Activation('softmax'))


#用于正则化时权重降低的速度
weight_decay = 0.0005
batch_num = 10
num_classes = 2
epochs = 1000

# create/load model
try:
    model = load_model('VGG16-model.h5')
    print("model loaded, training continuing...")
except:
    model = Sequential()
    create_model()
    print('failed to load model, create a new model')

# model.summary()
sgd = SGD(lr=0.6, decay=1e-6, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])


for epoch in range(epochs):
    for batch in range(batch_num):
        print('epoch:' + str(epoch+1) + '/' + str(epochs) + '-'*+20 + 'batch:'+str(batch+1) + '/' + str(batch_num))
        # import data
        # (x_train, y_train) = get_datas(r'.\CAD_train_224')
        (x_train, y_train) = get_datas(r'.\CAD_train_224\batch' + str(batch + 1))
        x_train = x_train.astype('float32')
        x_train /= 255
        y_train = keras.utils.to_categorical(y_train, num_classes)
        model.fit(x_train, y_train, epochs=1, batch_size=8, validation_split=0, verbose=1)
    model.save('VGG16-model.h5')

# x_test, y_test = get_datas(r'.\CAD_train_224\validation1')
# x_test = x_test.astype('float32')
# x_test /= 255
# y_test = keras.utils.to_categorical(y_test, num_classes)
# # loss_and_metrics = model.evaluate(x_test, y_test, batch_size=8)
# pre = model.predict(x_test, batch_size=8)


# model.save('VGG16-model.h5')

换了很多学习率,跑了几天最后效果也不好,决定还是用别人训练好的模型

Grad-CAM

由于没有调参的经验,自己训练的模型分类效果总是很差,这里直接用keras里根据imagenet训练好的VGG16模型,整个模型500+M,第一次load比较慢,也可以直接去Git上用迅雷下载,然后放到keras文件目录中的model里,这样速度比较快。

from keras.applications.vgg16 import (
    VGG16, preprocess_input, decode_predictions)
from keras.preprocessing import image
from tensorflow.python.framework import ops
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import cv2
import heapq


def load_image(path):
    img_path = path
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x


def register_gradient():
    if "GuidedBackProp" not in ops._gradient_registry._registry:
        @ops.RegisterGradient("GuidedBackProp")
        def _GuidedBackProp(op, grad):
            dtype = op.inputs[0].dtype
            return grad * tf.cast(grad > 0., dtype) * tf.cast(op.inputs[0] > 0., dtype)


def compile_saliency_function(model, activation_layer='block5_conv3'):
    input_img = model.input
    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
    layer_output = layer_dict[activation_layer].output
    max_output = K.max(layer_output, axis=3)
    saliency = K.gradients(K.sum(max_output), input_img)[0]
    return K.function([input_img, K.learning_phase()], [saliency])


def modify_backprop(model, name):
    g = tf.get_default_graph()
    with g.gradient_override_map({'Relu': name}):

        # get layers that have an activation
        layer_dict = [layer for layer in model.layers[1:]
                      if hasattr(layer, 'activation')]

        # replace relu activation
        for layer in layer_dict:
            if layer.activation == keras.activations.relu:
                layer.activation = tf.nn.relu

        # re-instanciate a new model
        new_model = VGG16(weights='imagenet')
    return new_model


def deprocess_image(x):
    '''
    Same normalization as in:
    https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
    '''
    if np.ndim(x) > 3:
        x = np.squeeze(x)
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if K.image_data_format() == 'channels_first':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x


def _compute_gradients(tensor, var_list):
    with tf.GradientTape() as gtape:
        grads = gtape.gradient(tensor, var_list)
        return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]

image_path = r'.\imagenet_test\timg.jpg'
preprocessed_input = load_image(image_path)
model = VGG16()

register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])
gradcam = saliency[0].transpose(1, 2, 3, 0)
a = np.squeeze(gradcam)
cv2.imshow(r'Guided_BP', deprocess_image(a))
cv2.waitKey(0)
cv2.imwrite(r'.\imagenet_test\Guided_BP.jpg', deprocess_image(a))

pred = model.predict(preprocessed_input)
top1_idx, top2_idx, top3_idx= heapq.nlargest(3, range(len(pred[0])), pred[0].take)
top_1 = decode_predictions(pred)[0][0]
top_2 = decode_predictions(pred)[0][1]
top_3 = decode_predictions(pred)[0][2]
print('Predicted class:')
print('%s (%s , %d) with probability %.2f' % (top_1[1], top_1[0], top1_idx, top_1[2]))
print('%s (%s , %d) with probability %.2f' % (top_2[1], top_2[0], top2_idx, top_2[2]))
print('%s (%s , %d) with probability %.2f' % (top_3[1], top_3[0], top3_idx, top_3[2]))
class_output = model.output[:, top1_idx]

last_conv_layer = model.get_layer("block5_pool")
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([preprocessed_input])

for i in range(512):
    conv_layer_output_value[:, :, i] *= pooled_grads_value[i]

heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)

img = cv2.imread(image_path)
img = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_NEAREST)
# img = img_to_array(image)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
cv2.imwrite(r'.\imagenet_test\Heatmap.jpg', heatmap)
cv2.imshow('heatmap', heatmap)
cv2.waitKey(0)

heatmap2color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
grd_CAM = cv2.addWeighted(img, 0.6, heatmap2color, 0.4, 0)
cv2.imwrite(r'.\imagenet_test\Grd-CAM.jpg', grd_CAM)
cv2.imshow('Grd-CAM', grd_CAM)
cv2.waitKey(0)

heatmap =cv2.imread(r'.\imagenet_test\Heatmap.jpg')
guided_CAM = saliency[0].transpose(1, 2, 3, 0) * heatmap[..., np.newaxis]
guided_CAM = deprocess_image(guided_CAM)
cv2.imwrite(r'.\imagenet_test\Guided-CAM.jpg', guided_CAM)
cv2.imshow('Guided-CAM', guided_CAM)
cv2.waitKey(0)

导向反向传播没有太理解,直接用的别人的代码,CAM主要成果是heatmap,下面是结果,图是随便找的一张
Keras 实现 Grad-CAM 基于 VGG16模型_第2张图片
Keras 实现 Grad-CAM 基于 VGG16模型_第3张图片Keras 实现 Grad-CAM 基于 VGG16模型_第4张图片Keras 实现 Grad-CAM 基于 VGG16模型_第5张图片

你可能感兴趣的:(CNN)