# coding: utf-8
# ## EDA & pre-processing
# In[1]:
import os , shutil
original_dataset_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats/train'
base_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small'
os.mkdir(base_dir)
# In[2]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
os.mkdir(train_dir)
os.mkdir(validation_dir)
os.mkdir(test_dir)
# In[3]:
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(train_cats_dir)
os.mkdir(train_dogs_dir)
os.mkdir(validation_cats_dir)
os.mkdir(validation_dogs_dir)
os.mkdir(test_cats_dir)
os.mkdir(test_dogs_dir)
# In[17]:
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)
# In[29]:
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_dogs_dir, fname)
shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_dogs_dir, fname)
shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_dogs_dir, fname)
shutil.copyfile(src, dst)
# In[30]:
len(os.listdir(train_cats_dir))
# ## Build the model
# small split has 2000 pictures with 1000 trains , 500 tests, 500 validations
# In[31]:
from keras import layers
from keras import models
# In[35]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape = (150, 150, 3)))
## 150 150 从何而来
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3) ,activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
# In[36]:
model.summary()
# In[38]:
from keras import optimizers
model.compile(loss = 'binary_crossentropy',
optimizer = optimizers.RMSprop(lr = 1e-4),
metrics = ['acc'])
# In[11]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255) ##1.代表浮点数
train_generator = train_datagen.flow_from_directory( ##产生数据流
train_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
# In[43]:
for data_batch, labels_batch in train_generator:
print('data batch shape :', data_batch.shape)## 遍历每个图片,所以要break
print('labels batch shape :', labels_batch.shape)
break
# ## Model Processing
# In[45]:
###开始训练
history = model.fit_generator(
train_generator, ##使用训练集数据
steps_per_epoch = 100, ##一个epoch分成100个batch,每个20样本即batch_szie
epochs = 30, ##30轮
validation_data = validation_generator, #使用交叉验证数据
validation_steps = 50 ##交叉验证50次
)
# 可以观察到在22次训练时几乎没有提升了,而训练集上的准确度达到0.99,这就是标准的过拟合
# ## Save & Load model
# In[3]:
model.save('cats_and_dogs_small_1.h5')
# In[6]:
from keras.models import load_model
model = load_model('cats_and_dogs_small_1.h5')
# In[5]:
pwd
# ## DIsplay learning curve of loss and accuracy
# In[51]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1) ##epoch :1-30
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 过拟合的原因主要是参数太多样本太少,可采用drop_out, L2正则化,数据扩增等等方法
# ## Using data augmentation
# In[13]:
datagen = ImageDataGenerator(
rotation_range = 40, ##旋转
width_shift_range = 0.2, ##缩放
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2, ##模糊
horizontal_flip = True,
fill_mode = 'nearest')
# ## Display augmented training images
# In[29]:
import matplotlib.pyplot as plt
from keras.preprocessing import image
fnames = [os.path.join(train_cats_dir, fname) for
fname in os.listdir(train_cats_dir)] ##对每个图片取对应名字构成矩阵,fname为变量
img_path = fnames[0]
img = image.load_img(img_path, target_size = (150, 150))
x = image.img_to_array(img) ##变成 [3,150,150]
x = x.reshape((1,) + x.shape) ##变成[1,3,150,150] 为何变为4维,第一个维度为batch_size
i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0: ##输出4个图像
break
plt.show()
# ## new convnet with dropout
# In[4]:
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), input_shape = (150, 150, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation= 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy',
optimizer = optimizers.RMSprop(lr = 1e-4),
metrics = ['acc'])
# ## Training with data-augmentation
# In[5]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator( ##扩充数据,
rescale = 1./255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,)
test_datagen = ImageDataGenerator(rescale = 1./255) ##validation 不需要进行数据扩增
train_generator = train_datagen.flow_from_directory( ##原本数据
train_dir,
target_size = (150, 150),
batch_size = 32,
class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory( ##原本val数据
validation_dir,
target_size = (150, 150),
batch_size = 32,
class_mode ='binary')
history = model.fit_generator(
train_generator,
steps_per_epoch = 100,
epochs = 100,
validation_data = validation_generator,
validation_steps = 50)
# ## save model 2
# In[6]:
model.save('cats_and_dogs_small_2.h5')
# In[1]:
from keras.models import load_model
model = load_model('cats_and_dogs_small_2.h5')
# In[17]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
#
# In[1]:
from keras.applications import VGG16
conv_base = VGG16(weights = 'imagenet',
include_top = False,
input_shape = (150, 150, 3)) ##頂部輸入爲自己的圖片shape,故有2,3參數
# In[2]:
conv_base.summary()
# In[12]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
base_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small/'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
datagen = ImageDataGenerator(rescale = 1./255)
batch_size = 20
def extract_features(directory, sample_count): ##特徵抽取
features = np.zeros(shape = (sample_count, 4, 4, 512)) ##最後一層池化將輸出維度變爲4*4*512,故此處抽出的特徵維度構成的張量也是一樣的
labels = np.zeros(shape = (sample_count)) ##labels於此處不是用於分類,而是表示每個輸入抽象而成的sample
generator = datagen.flow_from_directory(
directory, ##根目錄
target_size = (150, 150),
batch_size = batch_size, ##每個batch的樣本數
class_mode = 'binary')
i = 0
for input_batch, labels_batch in generator: ##生成的爲迭代器,返回兩個batch,分別代表target_size下的輸入和對應的標籤(文件夾名稱
features_batch = conv_base.predict(input_batch) ##使用VGG16對其進行抽取
features[i * batch_size : (i + 1) * batch_size] = features_batch
labels[i * batch_size : (i + 1) * batch_size] = labels_batch
i += 1
if i * batch_size >= sample_count:
break
return features, labels ## [] 也可看做np矩陣
# In[3]:
train_features, train_labels = extract_features(train_dir, 2000)
# In[4]:
validation_features, validation_labels = extract_features(validation_dir, 1000)
# In[5]:
test_features, test_labels = extract_features(test_dir, 1000)
# 現在得到了利用VGG16對圖片進行特徵抽取的結果,下一部需要將其用於分類,故需要接上一個全連接層,則需要對抽取得到的512 4 4 的特徵向量進行平鋪Flatting,然後接上全連接層並訓練分類
# In[6]:
train_features = np.reshape(train_features, (2000, 512*4*4 ))
validation_features = np.reshape(validation_features, (1000, 512*4*4))
test_features = np.reshape(test_features, (1000, 512*4*4))
# In[13]:
##這部分是用VGG16進行預先抽取然後再分類(只有少量的全連接層加上分類器),其速度特別快
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Dense(256, activation = 'relu', input_dim = (4*4*512)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation = 'sigmoid')) ## 將其輸出化爲0-1
model.compile( optimizer = optimizers.rmsprop(lr = 2e-5),
loss = 'binary_crossentropy',
metrics = ['acc'])
history = model.fit(train_features, train_labels,
epochs = 30, ##進行30輪,每輪每次讀20個數據
batch_size = 20,
validation_data = (validation_features,validation_labels))
# ## 繪制學習曲線
# In[16]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 上圖可以發現,無論是測試還是訓練集,其準確度都較高達到了0.9,但是觀察loss曲線發現其仍舊存在過擬合現象,故仍可以繼續優化,而過擬合的原因初步是因爲訓練集樣本較少,所以可以考慮使用數據變換進行擴充
# ## 將VGG16不做特徵抽取而直接作爲訓練時神經網絡的一部分
# In[14]:
from keras import models
from keras import layers
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
# 可以觀察到,其參數比之前的網絡高出5倍,訓練時間是天文數字,從而考慮固定vgg16的權值
# In[24]:
model.trainable_weights
# In[22]:
conv_base.trainable = False
len(model.trainable_weights) ##固定後需要訓練的部分只包括dense層的函數和偏置
# In[23]:
model.summary()
# ## 使用固定的VGG16,採用數據擴充訓練
# In[16]:
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
train_datagen = ImageDataGenerator(
rescale = 1./255, ##像素值歸一化
rotation_range = 40, ##旋轉40度
width_shift_range = 0.2, ## 寬度20
height_shift_range = 0.2,
shear_range = 0.2,##錯切變換,即斜向拉長或放縮
zoom_range = 0.2, ##整體放錯0.2
horizontal_flip = True, ## 隨機水平翻轉
fill_mode = 'nearest' ## 變換時所產生的邊緣區域填充方式
)
test_datagen = ImageDataGenerator(
rescale = 1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
batch_size = 20,
target_size = (150, 150),
class_mode = 'binary'
)
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size = (150, 150),
class_mode = 'binary',
batch_size = 20
)
# In[ ]:
model.compile(
loss = 'binary_crossentropy',
optimizer = optimizers.rmsprop(lr = 2e-5),
metrics = ['acc'])
history = model.fit_generator( ##由generator產生的數據,fit也用generator方法
train_generator,
steps_per_epoch=100,
epochs = 30,
validation_data = validation_generator,
validation_steps = 50 ) ##????
# In[32]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 此時acc和loss收斂到了較好的水平,過擬合問題基本去除了,如果想進一步去除過擬合問題則考慮從數據集入手,且代價較大
# ## Fine-tuning the model
# 作爲Freeze的補充方法,fine-tuning關注於修改部分的VGG16的結構,通常以頂層爲主,使得其適合自己的數據集
# ![image.png](attachment:image.png)
# In[2]:
conv_base.summary()
# In[9]:
conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
if layer.name == 'block5_conv1':
set_trainable = True
if set_trainable:
layer.trainable = True
else :
layer.trainable = False
conv_base.summary()
# 此處訓練了block5_conv1時,其餘conv2,conv3也被訓練,這是因爲一個block的一層在訓練時會影響到其下所有block_conv
# ## Training fine-tuning model
# In[18]:
model.compile(loss = 'binary_crossentropy',
optimizer = optimizers.rmsprop(lr = 1e-5),
metrics = ['acc']
)
history = model.fit_generator(
train_generator,
steps_per_epoch = 100,
epochs = 30,
validation_data = validation_generator,
validation_steps = 50
)
# In[19]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 將該不規則的曲線平滑化 ,前一個元素0.8,後一個元素0.2
# In[22]:
def smooth_curve(points, factor = 0.8):
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points
plt.plot(epochs, smooth_curve(acc), 'bo', label = 'Smoothed training acc')
plt.plot(epochs, smooth_curve(val_acc), 'b', label = 'Smoothed validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, smooth_curve(loss), 'bo', label = 'Smoothed training loss')
plt.plot(epochs, smooth_curve(val_loss), 'b', label = 'Smoothed validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 觀察到loss在變高的同時,acc卻提高了,這是因爲loss是基於點的平均值,而acc是對應loss下的一個分布而不是均值,所以會出loss在val上均值提高的情況下,acc也提高的情況
# ## 在測試集上進行訓練
# In[23]:
test_generator = test_datagen.flow_from_directory(
test_dir,
batch_size = 20,
target_size = (150, 150),
class_mode = 'binary' ##二分類
)
test_loss, test_acc = model.evaluate_generator(test_generator, steps=50)
print('test acc:', test_acc)
# 注意上述的條件只用了2000張圖片(10%的訓練集)就達到了92.5%正確率
# ## 訓練可視化
# In[1]:
from keras.models import load_model
model = load_model('cats_and_dogs_small_2.h5')
model.summary()
# In[30]:
##載入單張圖片數據
import numpy as np
img_path = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small/test/cats/cat.1700.jpg'
from keras.preprocessing import image
img = image.load_img(img_path, target_size = (150, 150))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis = 0) ##增加一個維度代表其爲第幾張圖片,在多張圖片展示時有用
img_tensor /= 255.
img_tensor.shape
# In[34]:
import matplotlib.pyplot as plt
plt.imshow(img_tensor[0])
plt.show() ##150*150的顯示
# In[36]:
from keras import models
layer_output = [layer.output for layer in model.layers[:8]] ##top 8 層輸出
activation_model = models.Model(inputs = model.input, output = layer_output) ## 8層輸出,一個輸入
# In[37]:
activations = activation_model.predict(img_tensor)
first_layer_activation = activations[0] ##使用第一層conv2d_1作輸出
# In[38]:
print(first_layer_activation.shape) ##注意channel爲32.比原來3channel擴增8倍
# 從而輸出維度爲conv2d_1的output shape
# In[62]:
import matplotlib.pyplot as plt
for i in range(0,31):
plt.matshow(first_layer_activation[0, :, :, i], cmap = 'viridis') ##使用第四個channel
plt.show()
# ## 可視化特征向量
# In[103]:
layer_names = []
images_per_row = 16 ##(自定義每層conv輸出16個圖
for layer in model.layers[:6]: ##不含dense層,否則會導致數據超出np數組
layer_names.append(layer.name)
for layer_name, layer_activation in zip(layer_names, activations): ##zip是由兩個list對應的元素構成二元組形成的列表
n_features = layer_activation.shape[-1] ##一個深度爲一個feature,故取-1,即倒數一個
size = layer_activation.shape[1]
n_cols = n_features // images_per_row ##//爲相除後向下取整,故此爲列數??
display_grid = np.zeros((size * n_cols, images_per_row * size )) ##對每層神經網絡輸出初始化np矩陣網格,其列爲size*深度數,列坐標爲size*每層的輸出特徵數
for col in range(n_cols):
for row in range(images_per_row):
channel_image = layer_activation[0, :, :, col * images_per_row + row] ##對應的圖對應的坐標
channel_image -= channel_image.mean()
channel_image /= channel_image.std() ##標準化操作
channel_image *= 64
channel_image += 128 ##數據還原,這兩步使得還原的數據不超過255太多
channel_image = np.clip(channel_image, 0, 255).astype('uint8')##超過上下限時的數據約減,astype爲8位短整形數字,節約空間
display_grid[col * size: (col+1) * size, row * size: (row + 1)*size] = channel_image
scale = 1./ size
plt.figure(figsize=(scale * display_grid.shape[1],
scale * display_grid.shape[0])) ##figsize描述了繪圖對象個數,這裏是輸出圖片個數
plt.title(layer_name)
plt.grid(False)
plt.imshow(display_grid, aspect='auto', cmap='viridis')
# In[104]:
plt.show()
# 從上可知越高層的卷积,其学到的特征向量就越加抽象
# ## 过滤器可视化
# In[35]:
from keras.applications import VGG16
from keras import backend as K
model = VGG16(weights = 'imagenet',
include_top = False)
layer_name = 'block3_conv1'
filter_index = 0
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, filter_index]) ##自定义损失函数loss,tf里面的layer输出自带Y-Y*,故对于MSE只需求均值
# In[36]:
grads = K.gradients(loss , model.input)[0] ##第二个参数相当于MSE中的自变量x
# In[37]:
## 正则化梯度trick,先将其平方,再求均值再开方,以使得平滑其快速收敛且避免陷入除以0 error
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
# In[38]:
iterate = K.function([model.input], [loss, grads]) ## 这里定义了一个iterate函数用来表示数学函数表达式,其中自变量是input,输出是loss和grads.
## 前者需要再调用该对象并赋予真正的输入
# In[39]:
def deprocess_image(x):
x -= x.mean() ##去均值化(0均值)
x /= (x.std() + 1e-5) ##标准差归一化,加上系数防止除0
x *= 0.1 ##总体标准差归为0.1
x += 0.5 ##使得其均值位置位于0,1的中点,此时将大量数据包含在0,1之间
x = np.clip(x, 0, 1) ##去除0,1外的离群点
x *= 255
x = np.clip(x, 0, 255).astype('uint8') ##将像素值化为uint8整型,因tensorflow在输出RGB时,若为浮点数需在0-1,大于1时为整型且需要位于0-255
return x
##求当loss最大化时对应的grads
input_img_data = np.random.random((1, 150, 150, 3))*20 + 128 ## (0-1)*20 + 128 的区间范围
step = 1
for i in range(40):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value + input_img_data
y = deprocess_image(input_img_data)
# ## 注意
# Numpy数组与C数组一样,变量都类似于指针,对同一块内存区域的引用
# In[40]:
def generate_pattern(layer_name, filter_index, size = 150):
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, filter_index])
grads = K.gradients(loss, model.input)[0]
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
iterate = K.function([model.input], [loss, grads])
input_img_data = np.random.random((1, size, size, 3))*20 + 128
step = 1
for i in range(40):
loss_values, grad_values = iterate([input_img_data])
input_img_data += grad_values*step
img = input_img_data[0]
return deprocess_image(img)
# In[43]:
import matplotlib.pyplot as plt
import numpy as np
plt.imshow(generate_pattern('block3_conv1', 0))
# In[44]:
plt.show()
# In[87]:
input_img_data = np.random.random((1, 150, 150, 3))*20 + 128
input_img_data += grads_value
img = deprocess_image(input_img_data)
# In[88]:
img
# In[17]:
plt.show()
# filter同大小的随机噪音
# 由于本身是由对loss做梯度下降得到的结果,所以可对loss做梯度上升,就可以反映filter在原图中对于哪些部分学习的倾向,越亮的区域代表梯度变化的越多,即主要学习的区域,通过使用随机噪声做背景以此可以视作filter的学习倾向.
# In[103]:
##创建block1_conv1整层的可视化
def create_vision(layer_name):
size = 64
margin = 5
results = np.zeros((8 * size + 7 * margin , 8 * size + 7*margin ,3))
for i in range(8):
for j in range(8):
filter_img = generate_pattern(layer_name, i + (j * 8), size = size)
horizontal_start = i * size + i * margin
horizontal_end = horizontal_start + size
vertical_start = j*size + j * margin
vertical_end = vertical_start + size
results[horizontal_start:horizontal_end, vertical_start:vertical_end, :] = filter_img
plt.figure(figsize=(20, 20))
plt.imshow(results.astype('uint8')) ##不知为何deprocess_image无效,使得results矩阵并不是uint8格式,故需要转换否则不显示
# In[92]:
plt.show()
# In[106]:
create_vision('block1_conv2')
plt.show()
# 通过观察filter得知,同一个Block之间的连续卷积(未经过池化),其每个filter的学习目标并没有较大的差异
# In[112]:
create_vision('block2_conv1')
plt.show()
# In[111]:
create_vision('block3_conv1')
plt.show()
# In[108]:
create_vision('block4_conv1')
plt.show()
# In[113]:
create_vision('block5_conv1')
plt.show()
# In[107]:
create_vision('block5_conv3')
plt.show()
# ## heatmap to visual
# In[1]:
from keras.applications import VGG16
model = VGG16(weights='imagenet')
# In[2]:
model.summary()
# 由上可知,需要将输入转换成224*224的格式
# In[4]:
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np
img_path = '/home/lkl/Pictures/Selection_002.png'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis= 0 ) ## 增加batch维度,表明是第几张
x = preprocess_input(x) ##预处理,将图片张量进行0均值化,这里采用固定值相减,故效果不一定好
# In[1]:
preds = model.predict(x) ##即prediction层的输出
print('Predicted:', decode_predictions(preds, top = 3)[0]) ##返回preds中最大3个值对应的label
# In[18]:
preds
# In[19]:
np.argmax(preds)
# 可知最大的是第386类,对应的类名为African_elephant
#
# 于是可以考虑将其第386的分类对应的模型输出保留,再将其覆盖到原图生成热力图上观察其学习倾向,即导向反向传播
#
# ICCV论文:Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization
# 对几种传播的分析:
# ![415974-1e2add2963798ca0.png](attachment:415974-1e2add2963798ca0.png)
# In[20]:
african_elephant_output = model.output[:, 386]
# In[51]:
import keras.backend as K
last_conv_layer = model.get_layer('block5_conv3')
grads = K.gradients(african_elephant_output, last_conv_layer.output)[0] ##计算最后一层卷积的输出(带有最多的语义以及分类信息且作为分类器输入)对结果为386的输出的偏导数,从而得到其对386的影响
##梯度是张量,第0个为需要的
pooled_grads = K.mean(grads, axis=(0,1,2)) ##用平均化对每一个channel的feature map池化,而不需对channel进行池化
iterate = K.function([model.input] , [pooled_grads, last_conv_layer.output[0]]) ##定义输出为上一步的池化梯度和最后一层卷积的输出,[0]表示去掉batch维度,为第0张图
pooled_grads, conv_layer_output_value = iterate([x])
for i in range(512):
conv_layer_output_value[:, :, i] *= pooled_grads[i] ##梯度反映权重,即经由训练好的模型,求得变量之间的梯度关系,可反映该变量对输出的影响结果,通过梯度乘以该输出值,可得到一次变化的大小
heatmap = np.mean(conv_layer_output_value, axis = -1) ##从最后一维求均值,得到各个channel的总体均值为热力图
# In[56]:
import matplotlib.pyplot as plt
heatmap = np.maximum(heatmap, 0) ##逐位选择heatmap和0之间较大的元素而占位,即去掉heatmap中的负值为0
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()
# ## 使用opencv将其覆盖在原图上
# In[58]:
import cv2
img = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])) ##将heatmap适应原图
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) ##生成heatmap彩色映射
superimposed_img = heatmap * 0.4 + img ## 0.4为彩色强度,为了避免彩色覆盖过强而使用0.4参数并映射到原图
cv2.imwrite('/home/lkl/Desktop/elephant_Grad_CAM.jpg', superimposed_img)
# ![elephant_Grad_CAM.jpg](attachment:elephant_Grad_CAM.jpg)
# 观察到彩色区域在覆盖大的大象时,刚好避开了耳朵的部位,说明了第384类其对应的最高层卷积时,对于耳朵也成为了评判的标准,且小象对应的特征更强烈,这是否与说明卷积核大小有关?因为就正常而言,应该两个大象的身体都有所检测才对
#
# 1.卷积核大小,深度等参数对可视化的影响
#
# 2.更多的可视化方法
#
# 3.可视化是否能够重构学习can
#
# In[ ]: