猫狗数据集是kaggle上的一个数据集,可以直接上去登录下载,
在CNN中一般要求输入尺寸固定( 原始CAM不需要),我们先将下载好的数据集进行裁剪:
import cv2
import os
# 数据预处理,把图片数据集的所有图片修剪成固定大小形状
def image_tailor(input_dir, out_dir):
for root, dirs, files in os.walk(input_dir):
for file in files:
# file为root目录中的文件
filepath = os.path.join(root, file) # 连接两个或更多的路径名组件,filepath路径为/root/file
try:
image = cv2.imread(filepath) # 根据输入路径读取照片
dim = (224, 224) # 裁剪的尺寸
resized = cv2.resize(image, dim) # 按比例将原图缩放成224*224
path = os.path.join(out_dir, file) # 保存的路径和相应的文件名
cv2.imwrite(path, resized) # 进行保存
except:
print(filepath)
os.remove(filepath)
cv2.waitKey()
input_patch = './train' # 数据集的地址
out_patch = './CAD_train' # 图片裁剪后保存的地址
image_tailor(input_patch, out_patch)
print('reshape finished')
**这里碰到的一个问题是电脑内存太小,整个训练集一共25000张图片,每张图片224*224*3*8bit,
第一次获取数据比较麻烦,得从图片中读取,可以读取后保存成其他形式方便下次读取
最开始一次性读入所有文件,由于数据量太大,无法进行预处理(归一化需要float类型),这里将数据分组,然后进行训练:
import cv2
import os
import shutil
def batch_process(file_path, batch_size, validationnum):
batchnum = (25000 - validationnum)//batch_size
val_num = validationnum//1000
if not os.path.exists(os.path.join(file_path, "batch1")):
for i in range(1, batchnum + 1):
os.mkdir(os.path.join(file_path, "batch" + str(i)))
if not os.path.exists(os.path.join(file_path, "validation")):
for i in range(1, val_num + 1):
os.mkdir(os.path.join(file_path, "validation" + str(i)))
num = batch_size/2
i = 1
cat_num = 0
dog_num = 0
for file in os.listdir(file_path): # file_dir文件夹下所有的文件
name = file.split(sep='.') # 对文件名以'.'做划分
if name[0] == 'dog':
if dog_num == num:
dog_num = 0
i += 1
if i == batchnum + 1:
break
dog_num += 1
shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, "batch" + str(i))))
i = 1
for file in os.listdir(file_path): # file_dir文件夹下所有的文件
name = file.split(sep='.') # 对文件名以'.'做划分
if name[0] == 'cat':
if cat_num == num:
cat_num = 0
i += 1
if i == batchnum + 1:
break
cat_num += 1
shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, "batch" + str(i))))
i = 1
k = 0
for file in os.listdir(file_path): # file_dir文件夹下所有的文件
name = file.split(sep='.')
if name[-1] == 'jpg':
if k == 1000:
k = 0
i += 1
k += 1
shutil.move((os.path.join(file_path, file)), (os.path.join(file_path, 'validation' + str(i))))
print('batch processed')
batch_process('.\CAD_train_224', 2000, 5000)
下面是分组后的结果
分组之后也无法直接训练,GPU显存太小,然后模型又比较复杂,所以每次读入2000个样本后分批训练,每次只训练8个
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras import regularizers
import numpy as np
import os
import cv2
from keras.models import load_model
def get_datas(file_dir):
# file_dir: 文件夹路径
# return: 乱序后的图片和标签
cats = []
label_cats = []
dogs = []
label_dogs = []
# 载入数据路径并写入标签值
for file in os.listdir(file_dir): # file_dir文件夹下所有的文件
name = file.split(sep='.') # 对文件名以'.'做划分
# print(name)
if name[0] == 'cat':
#cats.append(file_dir + file)
cats.append((os.path.join(file_dir,file)))
label_cats.append(0)
elif name[0] == 'dog':
#dogs.append(file_dir + file)
dogs.append((os.path.join(file_dir,file)))
label_dogs.append(1)
print("There are %d cats\nThere are %d dogs" % (len(cats), len(dogs)))
# print('cats:', cats)
# print('label_cats:', label_cats)
# print('dogs:', dogs)
# print('label_cats:', label_dogs)
# 打乱文件顺序
image_list = np.hstack((cats, dogs)) # 将cats和dogs矩阵按水平拼接
# print('image_list:', image_list)
label_list = np.hstack((label_cats, label_dogs))
# print(label_list)
temp = np.array([image_list, label_list])
temp = temp.transpose() # 转置
np.random.shuffle(temp) # 打乱顺序
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(float(i)) for i in label_list]
train_image = []
for name in image_list:
image = cv2.imread(os.path.join(file_dir, file))
# image = cv2.resize(image, (112, 112))
train_image.append(image)
train_image = np.array(train_image)
label_list = np.array(label_list[0:2000])
print('data loaded')
return train_image, label_list
def create_model():
# Block1
# layer1 Conv Output = 224*224*64
model.add(Conv2D(64, (3, 3),
padding='same',
input_shape=(224, 224, 3),
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
# model.add(BatchNormalization())
# model.add(Dropout(0.3))
# layer2 Conv Output = 224*224*64
model.add(Conv2D(64, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
# model.add(BatchNormalization())
# Maxpool Output = 112*112*64
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
# Block2
# layer3 Conv Output = 112*112*128
model.add(Conv2D(64, (3, 3),
padding='same',
input_shape=(112, 112, 3),
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer4 Conv Output = 112*112*128
model.add(Conv2D(128, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
# Maxpool Output = 56*56*128
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
# Block3
# layer5 Conv Output = 56*56*256
model.add(Conv2D(256, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer6 Conv Output = 56*56*256
model.add(Conv2D(256, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
# model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer7 Conv Output = 56*56*256
model.add(Conv2D(256, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
# Maxpool Output = 28*28*256
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
# Block4
# layer8 Conv Output = 28*28*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer9 Conv Output = 28*28*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
# model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer10 Conv Output = 28*28*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
# Maxpool Output = 14*14*512
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
# Block5
# layer11 Conv Output = 14*14*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer12 Conv Output = 14*14*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
# model.add(BatchNormalization())
model.add(Dropout(0.4))
# layer13 Conv Output = 14*14*512
model.add(Conv2D(512, (3, 3),
padding='same',
activation='relu',
kernel_regularizer=regularizers.l2(weight_decay)))
model.add(BatchNormalization())
# Maxpool Output = 7*7*512
model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
# Fully connected Layer
model.add(Flatten())
# layer14 fc Output = 1000
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
# layer15 fc Output = 50
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.3))
# layer16 fc Output = 2
model.add(Dense(2))
model.add(Activation('softmax'))
#用于正则化时权重降低的速度
weight_decay = 0.0005
batch_num = 10
num_classes = 2
epochs = 1000
# create/load model
try:
model = load_model('VGG16-model.h5')
print("model loaded, training continuing...")
except:
model = Sequential()
create_model()
print('failed to load model, create a new model')
# model.summary()
sgd = SGD(lr=0.6, decay=1e-6, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
for epoch in range(epochs):
for batch in range(batch_num):
print('epoch:' + str(epoch+1) + '/' + str(epochs) + '-'*+20 + 'batch:'+str(batch+1) + '/' + str(batch_num))
# import data
# (x_train, y_train) = get_datas(r'.\CAD_train_224')
(x_train, y_train) = get_datas(r'.\CAD_train_224\batch' + str(batch + 1))
x_train = x_train.astype('float32')
x_train /= 255
y_train = keras.utils.to_categorical(y_train, num_classes)
model.fit(x_train, y_train, epochs=1, batch_size=8, validation_split=0, verbose=1)
model.save('VGG16-model.h5')
# x_test, y_test = get_datas(r'.\CAD_train_224\validation1')
# x_test = x_test.astype('float32')
# x_test /= 255
# y_test = keras.utils.to_categorical(y_test, num_classes)
# # loss_and_metrics = model.evaluate(x_test, y_test, batch_size=8)
# pre = model.predict(x_test, batch_size=8)
# model.save('VGG16-model.h5')
换了很多学习率,跑了几天最后效果也不好,决定还是用别人训练好的模型
由于没有调参的经验,自己训练的模型分类效果总是很差,这里直接用keras里根据imagenet训练好的VGG16模型,整个模型500+M,第一次load比较慢,也可以直接去Git上用迅雷下载,然后放到keras文件目录中的model里,这样速度比较快。
from keras.applications.vgg16 import (
VGG16, preprocess_input, decode_predictions)
from keras.preprocessing import image
from tensorflow.python.framework import ops
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import cv2
import heapq
def load_image(path):
img_path = path
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x
def register_gradient():
if "GuidedBackProp" not in ops._gradient_registry._registry:
@ops.RegisterGradient("GuidedBackProp")
def _GuidedBackProp(op, grad):
dtype = op.inputs[0].dtype
return grad * tf.cast(grad > 0., dtype) * tf.cast(op.inputs[0] > 0., dtype)
def compile_saliency_function(model, activation_layer='block5_conv3'):
input_img = model.input
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
layer_output = layer_dict[activation_layer].output
max_output = K.max(layer_output, axis=3)
saliency = K.gradients(K.sum(max_output), input_img)[0]
return K.function([input_img, K.learning_phase()], [saliency])
def modify_backprop(model, name):
g = tf.get_default_graph()
with g.gradient_override_map({'Relu': name}):
# get layers that have an activation
layer_dict = [layer for layer in model.layers[1:]
if hasattr(layer, 'activation')]
# replace relu activation
for layer in layer_dict:
if layer.activation == keras.activations.relu:
layer.activation = tf.nn.relu
# re-instanciate a new model
new_model = VGG16(weights='imagenet')
return new_model
def deprocess_image(x):
'''
Same normalization as in:
https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
'''
if np.ndim(x) > 3:
x = np.squeeze(x)
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + 1e-5)
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
if K.image_data_format() == 'channels_first':
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
def _compute_gradients(tensor, var_list):
with tf.GradientTape() as gtape:
grads = gtape.gradient(tensor, var_list)
return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]
image_path = r'.\imagenet_test\timg.jpg'
preprocessed_input = load_image(image_path)
model = VGG16()
register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])
gradcam = saliency[0].transpose(1, 2, 3, 0)
a = np.squeeze(gradcam)
cv2.imshow(r'Guided_BP', deprocess_image(a))
cv2.waitKey(0)
cv2.imwrite(r'.\imagenet_test\Guided_BP.jpg', deprocess_image(a))
pred = model.predict(preprocessed_input)
top1_idx, top2_idx, top3_idx= heapq.nlargest(3, range(len(pred[0])), pred[0].take)
top_1 = decode_predictions(pred)[0][0]
top_2 = decode_predictions(pred)[0][1]
top_3 = decode_predictions(pred)[0][2]
print('Predicted class:')
print('%s (%s , %d) with probability %.2f' % (top_1[1], top_1[0], top1_idx, top_1[2]))
print('%s (%s , %d) with probability %.2f' % (top_2[1], top_2[0], top2_idx, top_2[2]))
print('%s (%s , %d) with probability %.2f' % (top_3[1], top_3[0], top3_idx, top_3[2]))
class_output = model.output[:, top1_idx]
last_conv_layer = model.get_layer("block5_pool")
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([preprocessed_input])
for i in range(512):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
img = cv2.imread(image_path)
img = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_NEAREST)
# img = img_to_array(image)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
cv2.imwrite(r'.\imagenet_test\Heatmap.jpg', heatmap)
cv2.imshow('heatmap', heatmap)
cv2.waitKey(0)
heatmap2color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
grd_CAM = cv2.addWeighted(img, 0.6, heatmap2color, 0.4, 0)
cv2.imwrite(r'.\imagenet_test\Grd-CAM.jpg', grd_CAM)
cv2.imshow('Grd-CAM', grd_CAM)
cv2.waitKey(0)
heatmap =cv2.imread(r'.\imagenet_test\Heatmap.jpg')
guided_CAM = saliency[0].transpose(1, 2, 3, 0) * heatmap[..., np.newaxis]
guided_CAM = deprocess_image(guided_CAM)
cv2.imwrite(r'.\imagenet_test\Guided-CAM.jpg', guided_CAM)
cv2.imshow('Guided-CAM', guided_CAM)
cv2.waitKey(0)