——欲速则不达,我已经很幸运了,只要珍惜这份幸运就好了,不必患得患失,慢慢来。
import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from pdb import set_trace
from torch.autograd import Variable
args = {
'ckptpath': './checkpoint.pth',
'dataset_dir': './food/'
}
args = argparse.Namespace(**args)
(1)cnn是一系列的卷积层最终得到4**4*512的图像
(2)flatten展平之后,再经过一系列的linear层得到11种的向量
# Model definition——分析这个model的结构:
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
def building_block(indim, outdim):
return [
nn.Conv2d(indim, outdim, 3, 1, 1),
nn.BatchNorm2d(outdim),
nn.ReLU(),
]
def stack_blocks(indim, outdim, block_num):
layers = building_block(indim, outdim)
for i in range(block_num - 1):
layers += building_block(outdim, outdim)
layers.append(nn.MaxPool2d(2, 2, 0))
return layers
cnn_list = []
cnn_list += stack_blocks(3, 128, 3)
cnn_list += stack_blocks(128, 128, 3)
cnn_list += stack_blocks(128, 256, 3)
cnn_list += stack_blocks(256, 512, 1)
cnn_list += stack_blocks(512, 512, 1)
self.cnn = nn.Sequential( * cnn_list) #上面所有的函数,都是为了这个cnn的过程的设计
dnn_list = [
nn.Linear(512 * 4 * 4, 1024),
nn.ReLU(),
nn.Dropout(p = 0.3),
nn.Linear(1024, 11),
]
self.fc = nn.Sequential( * dnn_list)
def forward(self, x):
out = self.cnn(x)
out = out.reshape(out.size()[0], -1)
return self.fc(out)
模型对象的实例化:
# Load trained model
model = Classifier().cuda()
checkpoint = torch.load(args.ckptpath)
model.load_state_dict(checkpoint['model_state_dict'])
# It should display:
# It might take some time, if it is too long, try to reload it.
# Dataset definition
#定义这个dataset了
class FoodDataset(Dataset):
def __init__(self, paths, labels, mode):
# mode: 'train' or 'eval'
self.paths = paths
self.labels = labels
trainTransform = transforms.Compose([
transforms.Resize(size=(128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
])
evalTransform = transforms.Compose([
transforms.Resize(size=(128, 128)),
transforms.ToTensor(),
])
self.transform = trainTransform if mode == 'train' else evalTransform
# pytorch dataset class
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
X = Image.open(self.paths[index])
X = self.transform(X)
Y = self.labels[index]
return X, Y
# help to get images for visualizing
def getbatch(self, indices):
images = []
labels = []
for index in indices:
image, label = self.__getitem__(index)
images.append(image)
labels.append(label)
return torch.stack(images), torch.tensor(labels)
# help to get data path and label
#先分析这个函数,再分析上面的dataset
def get_paths_labels(path):
#定义1个lambda函数
def my_key(name):
return int(name.replace(".jpg",""))+1000000*int(name.split("_")[0])
imgnames = os.listdir(path)
imgnames.sort(key=my_key) #使用这个lambda函数进行sort排序
imgpaths = []
labels = []
for name in imgnames:
imgpaths.append(os.path.join(path, name))
labels.append(int(name.split('_')[0]))
return imgpaths, labels
train_paths, train_labels = get_paths_labels(args.dataset_dir) #没问题,只是key处理了,但是name本身没改变
train_set = FoodDataset(train_paths, train_labels, mode='eval') #可能这里用到的model是已经train好的model
从这个dataset中抽出11张图像进行人工观察:
img_indices = [i for i in range(10)]
images, labels = train_set.getbatch(img_indices)
fig, axs = plt.subplots(1, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images):
axs[i].imshow(img.cpu().permute(1, 2, 0))
# print(labels)
#调用model的eval对整个batch的input进行预测得到1个batch的predicts结果
def predict(input):
# input: numpy array, (batches, height, width, channels)
model.eval()
input = torch.FloatTensor(input).permute(0, 3, 1, 2)
# pytorch tensor, (batches, channels, height, width)
output = model(input.cuda())
return output.detach().cpu().numpy()
#对输入的图像进行分割后标记
def segmentation(input):
# split the image into 200 pieces with the help of segmentaion from skimage
return slic(input, n_segments=200, compactness=1, sigma=1)
#设置画布参数
fig, axs = plt.subplots(1, len(img_indices), figsize=(15, 8))
# fix the random seed to make it reproducible
np.random.seed(16)
for idx, (image, label) in enumerate(zip(images.permute(0, 2, 3, 1).numpy(), labels)):
x = image.astype(np.double)
# numpy array for lime
#调用explainer的explain_instance,传递对应图像x,predict函数,segmentation函数作为它的参数
explainer = lime_image.LimeImageExplainer()
explaination = explainer.explain_instance(image=x, classifier_fn=predict, segmentation_fn=segmentation)
# doc: https://lime-ml.readthedocs.io/en/latest/lime.html?highlight=explain_instance#lime.lime_image.LimeImageExplainer.explain_instance
#调用上面的这个explaination,传递的参数主要是label值 和 num_features种类,其他的就是说是否显示不是重要的地方等。。
lime_img, mask = explaination.get_image_and_mask(
label=label.item(),
positive_only=False,
hide_rest=False,
num_features=11,
min_weight=0.05
)
# turn the result from explainer to the image
# doc: https://lime-ml.readthedocs.io/en/latest/lime.html?highlight=get_image_and_mask#lime.lime_image.ImageExplanation.get_image_and_mask
axs[idx].imshow(lime_img) #axs的第idx位置的图像,就放置这个lime_img了
#show出这些用lime标记的图像咯
plt.show()
plt.close()
(其实就是普通的gradient的方法)
The heatmaps that highlight pixels of the input image that contribute the most in the classification task.
总的来说,就是通过计算每个pixel对于整个loss的gradient,这个gradient就是新的图像的pixel数值
#对图像中的每个pixel的数值进行normalize
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
# return torch.log(image)/torch.log(image.max())
#用于计算saliency的函数
def compute_saliency_maps(x, y, model): #x就是图像, y就是label, model就是分类器
model.eval()
x = x.cuda()
# we want the gradient of the input x
x.requires_grad_()
y_pred = model(x)
loss_func = torch.nn.CrossEntropyLoss()
loss = loss_func(y_pred, y.cuda())
loss.backward()
# saliencies = x.grad.abs().detach().cpu()
saliencies, _ = torch.max(x.grad.data.abs().detach().cpu(),dim=1) #这一步,就是将每个像素的位置的gradient梯度(3个通道中的最大的那个)作为新的图像位置的 像素值
# We need to normalize each image, because their gradients might vary in scale, but we only care about the relation in each image
saliencies = torch.stack([normalize(item) for item in saliencies])
return saliencies
# images, labels = train_set.getbatch(img_indices)
saliencies = compute_saliency_maps(images, labels, model)
# visualize
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for row, target in enumerate([images, saliencies]):
for column, img in enumerate(target):
if row==0:
axs[row][column].imshow(img.permute(1, 2, 0).numpy()) #第一行:正常图像显示
# What is permute?
# In pytorch, the meaning of each dimension of image tensor is (channels, height, width)
# In matplotlib, the meaning of each dimension of image tensor is (height, width, channels)
# permute is a tool for permuting dimensions of tensors
# For example, img.permute(1, 2, 0) means that,
# - 0 dimension is the 1 dimension of the original tensor, which is height
# - 1 dimension is the 2 dimension of the original tensor, which is width
# - 2 dimension is the 0 dimension of the original tensor, which is channels
else:
axs[row][column].imshow(img.numpy(), cmap=plt.cm.hot) #第二行:热成像图
plt.show()
plt.close()
Smooth grad
Smooth grad 的方法是,在圖片中隨機地加入 noise,然後得到不同的 heatmap,把這些 heatmap 平均起來就得到一個比較能抵抗 noisy gradient 的結果。
# Smooth grad
#一样的normalize函数
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
#计算出类似于saliency map中的saliencies图像的东西:
def smooth_grad(x, y, model, epoch, param_sigma_multiplier): #总共epoch数,一个常量sigma
model.eval()
#x = x.cuda().unsqueeze(0)
mean = 0
sigma = param_sigma_multiplier / (torch.max(x) - torch.min(x)).item() #sigma就是1个数值
smooth = np.zeros(x.cuda().unsqueeze(0).size()) #一个和x相同大小zero变量
for i in range(epoch):
# call Variable to generate random noise
noise = Variable(x.data.new(x.size()).normal_(mean, sigma**2)) #sigma用作正太分布的标准差参数,抽取noise的抽样,和x一样大
x_mod = (x+noise).unsqueeze(0).cuda()
x_mod.requires_grad_()
y_pred = model(x_mod)
loss_func = torch.nn.CrossEntropyLoss()
loss = loss_func(y_pred, y.cuda().unsqueeze(0))
loss.backward()
# like the method in saliency map
smooth += x_mod.grad.abs().detach().cpu().data.numpy() #smooth用于累计每一个epoch的和
smooth = normalize(smooth / epoch) # don't forget to normalize,取个均值就可以了
# smooth = smooth / epoch
return smooth
# images, labels = train_set.getbatch(img_indices)
smooth = []
for i, l in zip(images, labels):
smooth.append(smooth_grad(i, l, model, 500, 0.4))
smooth = np.stack(smooth)
print(smooth.shape)
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8)) #2行喔!
for row, target in enumerate([images, smooth]):
for column, img in enumerate(target):
axs[row][column].imshow(np.transpose(img.reshape(3,128,128), (1,2,0)))
#定义正规化
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
layer_activations = None
#filter的观察函数,返回的是 activation 和 visulization
def filter_explanation(x, model, cnnid, filterid, iteration=100, lr=1):#cnnid是对应的卷积层的id,filterid是对应的过滤器的id
# x: input image
# cnnid, filterid: cnn layer id, which filter
model.eval()
def hook(model, input, output): #定义hook函数,就是将output给到全局的layer_activations
global layer_activations
layer_activations = output
hook_handle = model.cnn[cnnid].register_forward_hook(hook) #hook的handle句柄,下面有解释这行代码的含义
# When the model forward through the layer[cnnid], need to call the hook function first
# The hook function save the output of the layer[cnnid]
# After forwarding, we'll have the loss and the layer activation
# Filter activation: x passing the filter will generate the activation map
model(x.cuda()) # forward
# Based on the filterid given by the function argument, pick up the specific filter's activation map
# We just need to plot it, so we can detach from graph and save as cpu tensor
filter_activations = layer_activations[:, filterid, :, :].detach().cpu()
# Filter visualization: find the image that can activate the filter the most
x = x.cuda()
x.requires_grad_()
# input image gradient
optimizer = Adam([x], lr=lr)
# Use optimizer to modify the input image to amplify filter activation
for iter in range(iteration): #iteration==100
optimizer.zero_grad()
model(x)
objective = -layer_activations[:, filterid, :, :].sum()
# We want to maximize the filter activation's summation
# So we add a negative sign
objective.backward()
# Calculate the partial differential value of filter activation to input image
optimizer.step()
# Modify input image to maximize filter activation
filter_visualizations = x.detach().cpu().squeeze()
# Don't forget to remove the hook
hook_handle.remove()
# The hook will exist after the model register it, so you have to remove it after used
# Just register a new hook if you want to use it
return filter_activations, filter_visualizations
images, labels = train_set.getbatch(img_indices)
#下面的这个函数的参数可以看出,是获取第cnnid==6第6个卷积层的第0个过滤器的activation和visulization
filter_activations, filter_visualizations = filter_explanation(images, model, cnnid=6, filterid=0, iteration=100, lr=0.1)
#以下总共进行了3组图片的绘制,分别是原始图片、activation图片,visulation图片
fig, axs = plt.subplots(3, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images):
axs[0][i].imshow(img.permute(1, 2, 0))
# Plot filter activations
for i, img in enumerate(filter_activations):
axs[1][i].imshow(normalize(img))
# Plot filter visualization
for i, img in enumerate(filter_visualizations):
axs[2][i].imshow(normalize(img.permute(1, 2, 0)))
plt.show()
plt.close()
# 從下面四張圖可以看到,activate 的區域對應到一些物品的邊界,尤其是顏色對比較深的邊界
images, labels = train_set.getbatch(img_indices)
#下面的这个函数的参数可以看出,是获取第cnnid==23第23个卷积层的第0个过滤器的activation和visulization
filter_activations, filter_visualizations = filter_explanation(images, model, cnnid=23, filterid=0, iteration=100, lr=0.1)
# Plot filter activations
fig, axs = plt.subplots(3, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images):
axs[0][i].imshow(img.permute(1, 2, 0))
for i, img in enumerate(filter_activations):
axs[1][i].imshow(normalize(img))
for i, img in enumerate(filter_visualizations):
axs[2][i].imshow(normalize(img.permute(1, 2, 0)))
plt.show()
plt.close()
#什么都别说,5点45去西园吃点清淡的,就出去玩——看电影,或者其他的,好吧!
class IntegratedGradients():
def __init__(self, model): #初始化这个类
self.model = model
self.gradients = None
# Put model in evaluation mode
self.model.eval()
def generate_images_on_linear_path(self, input_image, steps):
# Generate scaled xbar images
xbar_list = [input_image*step/steps for step in range(steps)]
return xbar_list
def generate_gradients(self, input_image, target_class): #计算一张图像的gradient
# We want to get the gradients of the input image
input_image.requires_grad=True
# Forward
model_output = self.model(input_image)
# Zero grads
self.model.zero_grad()
# Target for backprop
one_hot_output = torch.FloatTensor(1, model_output.size()[-1]).zero_().cuda()
one_hot_output[0][target_class] = 1
# Backward
model_output.backward(gradient=one_hot_output)
self.gradients = input_image.grad
# Convert Pytorch variable to numpy array
# [0] to get rid of the first channel (1,3,128,128)
gradients_as_arr = self.gradients.data.cpu().numpy()[0]
return gradients_as_arr
def generate_integrated_gradients(self, input_image, target_class, steps): #计算img_list的图像的gradient的integrate
# Generate xbar images
xbar_list = self.generate_images_on_linear_path(input_image, steps)
# Initialize an iamge composed of zeros
integrated_grads = np.zeros(input_image.size())
for xbar_image in xbar_list:
# Generate gradients from xbar images
single_integrated_grad = self.generate_gradients(xbar_image, target_class)
# Add rescaled grads from xbar images
integrated_grads = integrated_grads + single_integrated_grad/steps
# [0] to get rid of the first channel (1,3,128,128)
return integrated_grads[0]
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
# put the image to cuda
images, labels = train_set.getbatch(img_indices)
images = images.cuda()
IG = IntegratedGradients(model)
integrated_grads = []
for i, img in enumerate(images):
img = img.unsqueeze(0)
integrated_grads.append(IG.generate_integrated_gradients(img, labels[i], 10))
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images): #输出一组正常的图像
axs[0][i].imshow(img.cpu().permute(1, 2, 0))
for i, img in enumerate(integrated_grads): #输出integrate的图像
axs[1][i].imshow(np.moveaxis(normalize(img),0,-1))
plt.show()
plt.close()
exBERT
这个模型可以用于查看注意力头部等信息,这里我就先不管了,后期慢慢摸索吧。。。。
湯姆有 3 個預訓練模型,但他忘記每一個模型是否有微調在閱讀理解的任務上了
通过观察各个token的embedding的位置,分析这个model是否具有阅读理解的fine_tune,
、。。。。我没做出来,有点难,
不过,它的代码就是从bert的每一层中取出embedding结果,再将每个token投射到二维坐标中进行分析
# Sentences for visualization
sentences = []
sentences += ["今天買了蘋果來吃"]
sentences += ["進口蘋果(富士)平均每公斤下跌12.3%"]
sentences += ["蘋果茶真難喝"]
sentences += ["老饕都知道智利的蘋果季節即將到來"]
sentences += ["進口蘋果因防止水分流失故添加人工果糖"]
sentences += ["蘋果即將於下月發振新款iPhone"]
sentences += ["蘋果獲新Face ID專利"]
sentences += ["今天買了蘋果手機"]
sentences += ["蘋果的股價又跌了"]
sentences += ["蘋果押寶指紋辨識技術"]
# Index of word selected for embedding comparison. E.g. For sentence "蘋果茶真難喝", if index is 0, "蘋 is selected"
select_word_index = [4, 2, 0, 8, 2, 0, 0, 4, 0, 0] #设置上面的词汇数组中的"苹果"二字的index位置
#计算向量a 和 向量b的欧式距离
def euclidean_distance(a, b):
# Compute euclidean distance (L2 norm) between two numpy vectors a and b
return np.linalg.norm(a-b)
#计算a向量和b向量的余弦相似度cosine_similarity = (A · B) / (||A|| * ||B||)
def cosine_similarity(a, b):
# Compute cosine similarity between two numpy vectors a and b
return 0
# Metric for comparison. Choose from euclidean_distance, cosine_similarity
#METRIC有2个选择,要么用欧式距离 要么用余弦相似度
METRIC = euclidean_distance
def get_select_embedding(output, tokenized_sentence, select_word_index):
# The layer to visualize, choose from 0 to 12
LAYER = 12
# Get selected layer's hidden state
hidden_state = output.hidden_states[LAYER][0]
# Convert select_word_index in sentence to select_token_index in tokenized sentence
select_token_index = tokenized_sentence.word_to_tokens(select_word_index).start
# Return embedding of selected word
return hidden_state[select_token_index].numpy()
# Tokenize and encode sentences into model's input format
tokenized_sentences = [tokenizer(sentence, return_tensors='pt') for sentence in sentences]
# Input encoded sentences into model and get outputs
with torch.no_grad():
outputs = [model(**tokenized_sentence) for tokenized_sentence in tokenized_sentences]
#得到词汇"苹果"在各个句子中的embedding
# Get embedding of selected word(s) in sentences. "embeddings" has shape (len(sentences), 768), where 768 is the dimension of BERT's hidden state
embeddings = [get_select_embedding(outputs[i], tokenized_sentences[i], select_word_index[i]) for i in range(len(outputs))]
#计算 对应 "苹果"二字的 词汇的距离
# Pairwse comparsion of sentences' embeddings using the metirc defined. "similarity_matrix" has shape [len(sentences), len(sentences)]
similarity_matrix = pairwise_distances(embeddings, metric=METRIC)
#绘制这个词汇的距离
##### Plot the similarity matrix #####
plt.rcParams['figure.figsize'] = [12, 10] # Change figure size of the plot
plt.imshow(similarity_matrix) # Display an image in the plot
plt.colorbar() # Add colorbar to the plot
plt.yticks(ticks=range(len(sentences)), labels=sentences, fontproperties=myfont) # Set tick locations and labels (sentences) of y-axis
plt.title('Comparison of BERT Word Embeddings') # Add title to the plot
for (i,j), label in np.ndenumerate(similarity_matrix): # np.ndenumerate is 2D version of enumerate
plt.text(i, j, '{:.2f}'.format(label), ha='center', va='center') # Add values in similarity_matrix to the corresponding position in the plot
plt.show() # Show the plot