基于keras的猫狗图像分类

# coding: utf-8

# ## EDA & pre-processing

# In[1]:


import os , shutil
original_dataset_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats/train'
base_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small'
os.mkdir(base_dir)


# In[2]:


train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
os.mkdir(train_dir)
os.mkdir(validation_dir)
os.mkdir(test_dir)


# In[3]:


train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(train_cats_dir)
os.mkdir(train_dogs_dir)
os.mkdir(validation_cats_dir)
os.mkdir(validation_dogs_dir)
os.mkdir(test_cats_dir)
os.mkdir(test_dogs_dir)


# In[17]:


fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src, dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src, dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src, dst)


# In[29]:


fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src, dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src, dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src, dst)


# In[30]:


len(os.listdir(train_cats_dir))


# ## Build the model

# small split has 2000 pictures with 1000 trains , 500 tests, 500 validations

# In[31]:


from keras import layers
from keras import models 


# In[35]:


model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape = (150, 150, 3)))  
## 150 150 从何而来
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3) ,activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))


# In[36]:


model.summary()


# In[38]:


from keras import optimizers
model.compile(loss = 'binary_crossentropy',
              optimizer = optimizers.RMSprop(lr = 1e-4),
              metrics = ['acc'])
             


# In[11]:


from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)  ##1.代表浮点数
train_generator = train_datagen.flow_from_directory(   ##产生数据流
        train_dir,
        target_size = (150, 150),
        batch_size = 20,
        class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size = (150, 150),
        batch_size = 20,
        class_mode = 'binary')


# In[43]:


for data_batch, labels_batch in train_generator:
    print('data batch shape :', data_batch.shape)## 遍历每个图片,所以要break
    print('labels batch shape :', labels_batch.shape)
    break


# ## Model Processing

# In[45]:


###开始训练
history = model.fit_generator(
    train_generator,  ##使用训练集数据
    steps_per_epoch = 100,   ##一个epoch分成100个batch,每个20样本即batch_szie
    epochs = 30,  ##30轮
    validation_data = validation_generator,  #使用交叉验证数据
    validation_steps = 50  ##交叉验证50次
        )


# 可以观察到在22次训练时几乎没有提升了,而训练集上的准确度达到0.99,这就是标准的过拟合

# ## Save  & Load model

# In[3]:


model.save('cats_and_dogs_small_1.h5')


# In[6]:


from keras.models import load_model
model = load_model('cats_and_dogs_small_1.h5')


# In[5]:


pwd


# ## DIsplay learning curve of loss and accuracy 

# In[51]:


import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)  ##epoch :1-30
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 过拟合的原因主要是参数太多样本太少,可采用drop_out, L2正则化,数据扩增等等方法

# ## Using data augmentation 

# In[13]:


datagen = ImageDataGenerator(
        rotation_range = 40,  ##旋转
        width_shift_range = 0.2, ##缩放
        height_shift_range = 0.2,
        shear_range = 0.2, 
        zoom_range = 0.2, ##模糊
        horizontal_flip = True,  
        fill_mode = 'nearest')


# ## Display augmented training images

# In[29]:


import matplotlib.pyplot as plt
from keras.preprocessing import image
fnames = [os.path.join(train_cats_dir, fname) for 
    fname in os.listdir(train_cats_dir)]    ##对每个图片取对应名字构成矩阵,fname为变量
img_path = fnames[0]
img = image.load_img(img_path, target_size = (150, 150))
x = image.img_to_array(img)  ##变成 [3,150,150]
x = x.reshape((1,) + x.shape)  ##变成[1,3,150,150]  为何变为4维,第一个维度为batch_size
i = 0
for batch in datagen.flow(x, batch_size=1):
    plt.figure(i)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    i += 1
    if i % 4 == 0:   ##输出4个图像
        break
plt.show()


# ## new convnet with dropout

# In[4]:


from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), input_shape = (150, 150, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation= 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy',
              optimizer = optimizers.RMSprop(lr = 1e-4),
              metrics = ['acc'])


# ## Training with data-augmentation

# In[5]:


from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(  ##扩充数据,
    rescale = 1./255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,)
test_datagen = ImageDataGenerator(rescale = 1./255)   ##validation 不需要进行数据扩增
train_generator = train_datagen.flow_from_directory(  ##原本数据
    train_dir,
    target_size = (150, 150),
    batch_size = 32,
    class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(  ##原本val数据
    validation_dir,
    target_size = (150, 150),
    batch_size = 32,
    class_mode ='binary')
history = model.fit_generator(
    train_generator,
    steps_per_epoch = 100,
    epochs = 100,
    validation_data = validation_generator,
    validation_steps = 50)


# ## save model 2 

# In[6]:


model.save('cats_and_dogs_small_2.h5')


# In[1]:


from keras.models import load_model
model = load_model('cats_and_dogs_small_2.h5')


# In[17]:


import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 

# In[1]:


from keras.applications import VGG16
conv_base = VGG16(weights = 'imagenet',
                  include_top = False,
                  input_shape = (150, 150, 3))    ##頂部輸入爲自己的圖片shape,故有2,3參數


# In[2]:


conv_base.summary()


# In[12]:


import os
import numpy as np 
from keras.preprocessing.image import ImageDataGenerator
base_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small/'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
datagen = ImageDataGenerator(rescale = 1./255)
batch_size = 20
def extract_features(directory, sample_count):    ##特徵抽取
    features = np.zeros(shape = (sample_count, 4, 4, 512))  ##最後一層池化將輸出維度變爲4*4*512,故此處抽出的特徵維度構成的張量也是一樣的
    labels = np.zeros(shape = (sample_count))  ##labels於此處不是用於分類,而是表示每個輸入抽象而成的sample
    
    generator = datagen.flow_from_directory(
    directory,  ##根目錄
    target_size = (150, 150),
    batch_size = batch_size,   ##每個batch的樣本數
    class_mode = 'binary')
    
    
    i = 0
    for input_batch, labels_batch in generator:   ##生成的爲迭代器,返回兩個batch,分別代表target_size下的輸入和對應的標籤(文件夾名稱
        features_batch = conv_base.predict(input_batch)  ##使用VGG16對其進行抽取
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels   ## [] 也可看做np矩陣
    


# In[3]:


train_features, train_labels = extract_features(train_dir, 2000)


# In[4]:


validation_features, validation_labels = extract_features(validation_dir, 1000)


# In[5]:


test_features, test_labels = extract_features(test_dir, 1000)


# 現在得到了利用VGG16對圖片進行特徵抽取的結果,下一部需要將其用於分類,故需要接上一個全連接層,則需要對抽取得到的512 4 4 的特徵向量進行平鋪Flatting,然後接上全連接層並訓練分類

# In[6]:


train_features = np.reshape(train_features, (2000, 512*4*4 ))
validation_features = np.reshape(validation_features, (1000, 512*4*4))
test_features = np.reshape(test_features, (1000, 512*4*4))


# In[13]:


##這部分是用VGG16進行預先抽取然後再分類(只有少量的全連接層加上分類器),其速度特別快
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Dense(256, activation = 'relu', input_dim = (4*4*512)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation = 'sigmoid'))  ## 將其輸出化爲0-1
model.compile( optimizer = optimizers.rmsprop(lr = 2e-5),
                                              loss = 'binary_crossentropy',
                                              metrics = ['acc'])
history = model.fit(train_features, train_labels,
                    epochs = 30,    ##進行30輪,每輪每次讀20個數據
                    batch_size = 20,
                    validation_data = (validation_features,validation_labels))


# ## 繪制學習曲線

# In[16]:


import matplotlib.pyplot as plt 
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 上圖可以發現,無論是測試還是訓練集,其準確度都較高達到了0.9,但是觀察loss曲線發現其仍舊存在過擬合現象,故仍可以繼續優化,而過擬合的原因初步是因爲訓練集樣本較少,所以可以考慮使用數據變換進行擴充

# ## 將VGG16不做特徵抽取而直接作爲訓練時神經網絡的一部分

# In[14]:


from keras import models 
from keras import layers 
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()


# 可以觀察到,其參數比之前的網絡高出5倍,訓練時間是天文數字,從而考慮固定vgg16的權值

# In[24]:


model.trainable_weights  


# In[22]:


conv_base.trainable = False  
len(model.trainable_weights)  ##固定後需要訓練的部分只包括dense層的函數和偏置


# In[23]:


model.summary()


# ## 使用固定的VGG16,採用數據擴充訓練

# In[16]:


from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
train_datagen  = ImageDataGenerator(
        rescale = 1./255, ##像素值歸一化
        rotation_range = 40,  ##旋轉40度
        width_shift_range  = 0.2, ## 寬度20
        height_shift_range = 0.2,
        shear_range = 0.2,##錯切變換,即斜向拉長或放縮
        zoom_range = 0.2,  ##整體放錯0.2
        horizontal_flip = True,  ## 隨機水平翻轉
        fill_mode = 'nearest'  ## 變換時所產生的邊緣區域填充方式
            )
test_datagen = ImageDataGenerator(
        rescale = 1./255)
train_generator = train_datagen.flow_from_directory(
    train_dir,
    batch_size = 20,
    target_size = (150, 150),
    class_mode = 'binary'
)
validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size = (150, 150),
    class_mode = 'binary',
    batch_size = 20
)


# In[ ]:


model.compile(
    loss = 'binary_crossentropy',
    optimizer = optimizers.rmsprop(lr = 2e-5),
    metrics = ['acc'])
history = model.fit_generator(  ##由generator產生的數據,fit也用generator方法
    train_generator,
    steps_per_epoch=100,
    epochs = 30,
    validation_data = validation_generator,
    validation_steps = 50 )   ##????


# In[32]:


import matplotlib.pyplot as plt 
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 此時acc和loss收斂到了較好的水平,過擬合問題基本去除了,如果想進一步去除過擬合問題則考慮從數據集入手,且代價較大

# ## Fine-tuning the model
# 作爲Freeze的補充方法,fine-tuning關注於修改部分的VGG16的結構,通常以頂層爲主,使得其適合自己的數據集

# ![image.png](attachment:image.png)

# In[2]:


conv_base.summary()


# In[9]:


conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else :
        layer.trainable = False
conv_base.summary()


# 此處訓練了block5_conv1時,其餘conv2,conv3也被訓練,這是因爲一個block的一層在訓練時會影響到其下所有block_conv

# ## Training fine-tuning model

# In[18]:


model.compile(loss = 'binary_crossentropy',
              optimizer = optimizers.rmsprop(lr = 1e-5),
              metrics = ['acc']
              )
history = model.fit_generator(
        train_generator,
        steps_per_epoch = 100,
        epochs = 30,
        validation_data = validation_generator,
        validation_steps = 50
)


# In[19]:


import matplotlib.pyplot as plt 
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label = 'Training_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo' , label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 將該不規則的曲線平滑化 ,前一個元素0.8,後一個元素0.2

# In[22]:


def smooth_curve(points, factor = 0.8):
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points
plt.plot(epochs, smooth_curve(acc), 'bo', label = 'Smoothed training acc') 
plt.plot(epochs, smooth_curve(val_acc), 'b', label = 'Smoothed validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, smooth_curve(loss), 'bo', label = 'Smoothed training loss') 
plt.plot(epochs, smooth_curve(val_loss), 'b', label = 'Smoothed validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


# 觀察到loss在變高的同時,acc卻提高了,這是因爲loss是基於點的平均值,而acc是對應loss下的一個分布而不是均值,所以會出loss在val上均值提高的情況下,acc也提高的情況

# ## 在測試集上進行訓練

# In[23]:


test_generator = test_datagen.flow_from_directory(
    test_dir,
    batch_size = 20,
    target_size = (150, 150),
    class_mode = 'binary'    ##二分類
    )
test_loss, test_acc = model.evaluate_generator(test_generator, steps=50)
print('test acc:', test_acc)


# 注意上述的條件只用了2000張圖片(10%的訓練集)就達到了92.5%正確率

# ## 訓練可視化

# In[1]:


from keras.models import load_model
model = load_model('cats_and_dogs_small_2.h5')
model.summary()


# In[30]:


##載入單張圖片數據
import numpy as np
img_path = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small/test/cats/cat.1700.jpg'
from keras.preprocessing import image
img = image.load_img(img_path, target_size = (150, 150))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis = 0)   ##增加一個維度代表其爲第幾張圖片,在多張圖片展示時有用
img_tensor /= 255.
img_tensor.shape


# In[34]:


import matplotlib.pyplot as plt
plt.imshow(img_tensor[0])
plt.show() ##150*150的顯示


# In[36]:


from keras import models
layer_output = [layer.output for layer in model.layers[:8]]  ##top 8 層輸出
activation_model = models.Model(inputs = model.input, output = layer_output)  ## 8層輸出,一個輸入


# In[37]:


activations = activation_model.predict(img_tensor)
first_layer_activation = activations[0] ##使用第一層conv2d_1作輸出


# In[38]:


print(first_layer_activation.shape)  ##注意channel爲32.比原來3channel擴增8倍


# 從而輸出維度爲conv2d_1的output shape

# In[62]:


import matplotlib.pyplot as plt
for i in range(0,31):
    plt.matshow(first_layer_activation[0, :, :, i], cmap = 'viridis')  ##使用第四個channel
plt.show()


# ## 可視化特征向量

# In[103]:


layer_names = []
images_per_row = 16  ##(自定義每層conv輸出16個圖
for layer in model.layers[:6]:  ##不含dense層,否則會導致數據超出np數組
    layer_names.append(layer.name)

for layer_name, layer_activation in zip(layer_names, activations):  ##zip是由兩個list對應的元素構成二元組形成的列表
    n_features = layer_activation.shape[-1] ##一個深度爲一個feature,故取-1,即倒數一個
    size = layer_activation.shape[1]
    n_cols = n_features // images_per_row ##//爲相除後向下取整,故此爲列數??
    
    display_grid = np.zeros((size * n_cols, images_per_row * size )) ##對每層神經網絡輸出初始化np矩陣網格,其列爲size*深度數,列坐標爲size*每層的輸出特徵數
    for col in range(n_cols):
        for row in range(images_per_row):
            channel_image = layer_activation[0, :, :, col * images_per_row + row]  ##對應的圖對應的坐標
            channel_image -= channel_image.mean() 
            channel_image /= channel_image.std() ##標準化操作
            channel_image *= 64
            channel_image += 128  ##數據還原,這兩步使得還原的數據不超過255太多
            channel_image  = np.clip(channel_image, 0, 255).astype('uint8')##超過上下限時的數據約減,astype爲8位短整形數字,節約空間
            display_grid[col * size: (col+1) * size, row * size: (row + 1)*size] = channel_image
    scale = 1./ size 
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0])) ##figsize描述了繪圖對象個數,這裏是輸出圖片個數
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect='auto', cmap='viridis')
            


# In[104]:


plt.show()


# 從上可知越高層的卷积,其学到的特征向量就越加抽象

# ## 过滤器可视化

# In[35]:


from keras.applications import VGG16
from keras import backend as K
model = VGG16(weights = 'imagenet',
              include_top = False)
layer_name = 'block3_conv1'
filter_index = 0
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, filter_index])  ##自定义损失函数loss,tf里面的layer输出自带Y-Y*,故对于MSE只需求均值


# In[36]:


grads = K.gradients(loss , model.input)[0]   ##第二个参数相当于MSE中的自变量x


# In[37]:


## 正则化梯度trick,先将其平方,再求均值再开方,以使得平滑其快速收敛且避免陷入除以0 error
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)


# In[38]:


iterate = K.function([model.input], [loss, grads])  ## 这里定义了一个iterate函数用来表示数学函数表达式,其中自变量是input,输出是loss和grads.
                                                    ## 前者需要再调用该对象并赋予真正的输入


# In[39]:


def deprocess_image(x):
    x -= x.mean()        ##去均值化(0均值)
    x /= (x.std() + 1e-5)   ##标准差归一化,加上系数防止除0
    x *= 0.1   ##总体标准差归为0.1
    
    x += 0.5   ##使得其均值位置位于0,1的中点,此时将大量数据包含在0,1之间
    x = np.clip(x, 0, 1)  ##去除0,1外的离群点
    x *= 255   
    x = np.clip(x, 0, 255).astype('uint8')  ##将像素值化为uint8整型,因tensorflow在输出RGB时,若为浮点数需在0-1,大于1时为整型且需要位于0-255
    return x
##求当loss最大化时对应的grads
input_img_data = np.random.random((1, 150, 150, 3))*20 + 128  ## (0-1)*20 + 128 的区间范围
step = 1
for i in range(40):
    loss_value, grads_value = iterate([input_img_data])
    input_img_data += grads_value + input_img_data



y = deprocess_image(input_img_data)


# ## 注意
# Numpy数组与C数组一样,变量都类似于指针,对同一块内存区域的引用

# In[40]:


def generate_pattern(layer_name, filter_index, size = 150):
    layer_output = model.get_layer(layer_name).output
    loss = K.mean(layer_output[:, :, :, filter_index])
    grads = K.gradients(loss, model.input)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) 
    iterate = K.function([model.input], [loss, grads])
    input_img_data = np.random.random((1, size, size, 3))*20 + 128
    
    step = 1
    for i in range(40):
        loss_values, grad_values = iterate([input_img_data])
        input_img_data += grad_values*step
    img = input_img_data[0]
    return deprocess_image(img)


# In[43]:


import matplotlib.pyplot as plt
import numpy as np
plt.imshow(generate_pattern('block3_conv1', 0))


# In[44]:


plt.show()


# In[87]:


input_img_data = np.random.random((1, 150, 150, 3))*20 + 128

input_img_data +=  grads_value
img = deprocess_image(input_img_data)


# In[88]:


img


# In[17]:


plt.show()


# filter同大小的随机噪音

# 由于本身是由对loss做梯度下降得到的结果,所以可对loss做梯度上升,就可以反映filter在原图中对于哪些部分学习的倾向,越亮的区域代表梯度变化的越多,即主要学习的区域,通过使用随机噪声做背景以此可以视作filter的学习倾向.

# In[103]:


##创建block1_conv1整层的可视化
def create_vision(layer_name):
    size = 64
    margin = 5
    
    results = np.zeros((8 * size + 7 * margin , 8 * size + 7*margin ,3))
    for i in range(8):
        for j in range(8):
            filter_img = generate_pattern(layer_name, i + (j * 8), size = size)
            horizontal_start = i * size + i * margin
            horizontal_end = horizontal_start + size
            vertical_start = j*size + j * margin
            vertical_end = vertical_start + size
            results[horizontal_start:horizontal_end, vertical_start:vertical_end, :] = filter_img
    plt.figure(figsize=(20, 20))
    plt.imshow(results.astype('uint8'))   ##不知为何deprocess_image无效,使得results矩阵并不是uint8格式,故需要转换否则不显示


# In[92]:


plt.show()


# In[106]:


create_vision('block1_conv2')
plt.show()


# 通过观察filter得知,同一个Block之间的连续卷积(未经过池化),其每个filter的学习目标并没有较大的差异

# In[112]:


create_vision('block2_conv1')
plt.show()


# In[111]:


create_vision('block3_conv1')
plt.show()


# In[108]:


create_vision('block4_conv1')
plt.show()


# In[113]:


create_vision('block5_conv1')
plt.show()


# In[107]:


create_vision('block5_conv3')
plt.show()


# ## heatmap to visual

# In[1]:


from keras.applications import VGG16
model = VGG16(weights='imagenet')


# In[2]:


model.summary()


# 由上可知,需要将输入转换成224*224的格式

# In[4]:


from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions

import numpy as np
img_path = '/home/lkl/Pictures/Selection_002.png'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)

x = np.expand_dims(x, axis= 0 )  ## 增加batch维度,表明是第几张

x = preprocess_input(x)  ##预处理,将图片张量进行0均值化,这里采用固定值相减,故效果不一定好


# In[1]:


preds = model.predict(x)  ##即prediction层的输出
print('Predicted:', decode_predictions(preds, top = 3)[0])  ##返回preds中最大3个值对应的label


# In[18]:


preds


# In[19]:


np.argmax(preds)  


# 可知最大的是第386类,对应的类名为African_elephant

# 
# 于是可以考虑将其第386的分类对应的模型输出保留,再将其覆盖到原图生成热力图上观察其学习倾向,即导向反向传播
# 
# ICCV论文:Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization
# 对几种传播的分析:
# ![415974-1e2add2963798ca0.png](attachment:415974-1e2add2963798ca0.png)

# In[20]:


african_elephant_output = model.output[:, 386]


# In[51]:


import keras.backend as K
last_conv_layer = model.get_layer('block5_conv3')
grads = K.gradients(african_elephant_output, last_conv_layer.output)[0]  ##计算最后一层卷积的输出(带有最多的语义以及分类信息且作为分类器输入)对结果为386的输出的偏导数,从而得到其对386的影响
                                                                         ##梯度是张量,第0个为需要的
pooled_grads = K.mean(grads, axis=(0,1,2)) ##用平均化对每一个channel的feature map池化,而不需对channel进行池化

iterate = K.function([model.input] , [pooled_grads, last_conv_layer.output[0]]) ##定义输出为上一步的池化梯度和最后一层卷积的输出,[0]表示去掉batch维度,为第0张图
pooled_grads, conv_layer_output_value = iterate([x])
for i in range(512):
     conv_layer_output_value[:, :, i] *= pooled_grads[i]    ##梯度反映权重,即经由训练好的模型,求得变量之间的梯度关系,可反映该变量对输出的影响结果,通过梯度乘以该输出值,可得到一次变化的大小
heatmap = np.mean(conv_layer_output_value, axis = -1)  ##从最后一维求均值,得到各个channel的总体均值为热力图


# In[56]:


import matplotlib.pyplot as plt
heatmap = np.maximum(heatmap, 0)  ##逐位选择heatmap和0之间较大的元素而占位,即去掉heatmap中的负值为0
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()


# ## 使用opencv将其覆盖在原图上

# In[58]:


import cv2
img = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))  ##将heatmap适应原图

heatmap = np.uint8(255 * heatmap)

heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)  ##生成heatmap彩色映射

superimposed_img = heatmap * 0.4 + img  ## 0.4为彩色强度,为了避免彩色覆盖过强而使用0.4参数并映射到原图

cv2.imwrite('/home/lkl/Desktop/elephant_Grad_CAM.jpg', superimposed_img)


# ![elephant_Grad_CAM.jpg](attachment:elephant_Grad_CAM.jpg)
# 观察到彩色区域在覆盖大的大象时,刚好避开了耳朵的部位,说明了第384类其对应的最高层卷积时,对于耳朵也成为了评判的标准,且小象对应的特征更强烈,这是否与说明卷积核大小有关?因为就正常而言,应该两个大象的身体都有所检测才对
# 
# 1.卷积核大小,深度等参数对可视化的影响
# 
# 2.更多的可视化方法
# 
# 3.可视化是否能够重构学习can
# 

# In[ ]:





你可能感兴趣的:(机器学习)