本篇不涉及过多理论知识,以代码为主.主要运行平台colab,语言python,pytorch.
这主要是图像聚类的教程,分为两大部分.
训练部分:准备数据集、模型、训练
聚类部分:准备数据集、模型、降维和聚类
!gdown --id '1BZb2AqOHHaad7Mo82St1qTBaXo_xtcUc' --output trainX.npy
!gdown --id '152NKCpj8S_zuIx3bQy0NN5oqpvBjdPIq' --output valX.npy
!gdown --id '1_hRGsFtm5KEazUg2ZvPZcuNScGF-ANh4' --output valY.npy
!mkdir checkpoints
!ls
数据预处理,图片int数值0~255,线性转换到-1 ~1的float数值.
import numpy as np
def preprocess(image_list):
#图像的格式规范化
""" Normalize Image and Permute (N,H,W,C) to (N,C,H,W)
Args:
image_list: List of images (9000, 32, 32, 3)
Returns:
image_list: List of images (9000, 3, 32, 32)
"""
image_list = np.array(image_list)#取出数组
image_list = np.transpose(image_list, (0, 3, 1, 2))
image_list = (image_list / 255.0) * 2 - 1#0—255转换为-1—1的float数据
image_list = image_list.astype(np.float32)
return image_list
from torch.utils.data import Dataset#加载数据集
class Image_Dataset(Dataset):
def __init__(self, image_list):
self.image_list = image_list
def __len__(self):
return len(self.image_list)
def __getitem__(self, idx):
images = self.image_list[idx]
return images
from torch.utils.data import DataLoader
trainX = np.load('trainX.npy')
trainX_preprocessed = preprocess(trainX)
img_dataset = Image_Dataset(trainX_preprocessed)
import random
import torch
def count_parameters(model, only_trainable=False):
if only_trainable:
return sum(p.numel() for p in model.parameters() if p.requires_grad)
else:
return sum(p.numel() for p in model.parameters())
def same_seeds(seed):
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
np.random.seed(seed) # Numpy module.
random.seed(seed) # Python random module.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
#编码器
self.encoder = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2),
nn.Conv2d(128, 256, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2)
)
#解码器
self.decoder = nn.Sequential(
nn.ConvTranspose2d(256, 128, 5, stride=1),
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 9, stride=1),
nn.ReLU(True),
nn.ConvTranspose2d(64, 3, 17, stride=1),
nn.Tanh()
)
def forward(self, x):
#两层编码器
x1 = self.encoder(x)
x = self.decoder(x1)
return x1, x
查看colab的数值
!nvidia-smi
import torch
from torch import optim
same_seeds(0)
model = AE().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)
model.train()
n_epoch = 100
# 準備 dataloader, model, loss criterion 和 optimizer
img_dataloader = DataLoader(img_dataset, batch_size=64, shuffle=True)
epoch_loss = 0
# 主要的訓練過程
for epoch in range(n_epoch):
epoch_loss = 0
for data in img_dataloader:
img = data
img = img.cuda()
output1, output = model(img)
loss = criterion(output, img)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 10 == 0:
torch.save(model.state_dict(), './checkpoints/checkpoint_{}.pth'.format(epoch+1))
epoch_loss += loss.item()
print('epoch [{}/{}], loss:{:.5f}'.format(epoch+1, n_epoch, epoch_loss))
# 訓練完成後儲存 model
torch.save(model.state_dict(), './checkpoints/last_checkpoint.pth')
import numpy as np
def cal_acc(gt, pred):
""" Computes categorization accuracy of our task.
Args:
gt: Ground truth labels (9000, )
pred: Predicted labels (9000, )
Returns:
acc: Accuracy (0~1 scalar)
"""
# Calculate Correct predictions
correct = np.sum(gt == pred)
acc = correct / gt.shape[0]
# 因為是 binary unsupervised clustering,因此取 max(acc, 1-acc)
return max(acc, 1-acc)
import matplotlib.pyplot as plt
#画图
def plot_scatter(feat, label, savefig=None):
""" Plot Scatter Image.
Args:
feat: the (x, y) coordinate of clustering result, shape: (9000, 2)
label: ground truth label of image (0/1), shape: (9000,)
Returns:
None
"""
X = feat[:, 0]
Y = feat[:, 1]
plt.scatter(X, Y, c = label)
plt.legend(loc='best')
if savefig is not None:
plt.savefig(savefig)
plt.show()
return
import torch
from sklearn.decomposition import KernelPCA
#主成分分析(PCA)
#主成分分析(Principal Component Analysis)是目前为止最流行的降维算法。首先它找到接近数据集分布的超平面,然后将所有的数据都投影到这个超平面上。
#保留最大方差的超平面
# kPCA 是无监督学习算法,因此没有明显的性能指标可以帮助我们选择最佳的核和超参数值。不过,降维通常是监督学习任务(例如分类)的准备步骤.
from sklearn.manifold import TSNE
from sklearn.cluster import MiniBatchKMeans
def inference(X, model, batch_size=256):
X = preprocess(X)
dataset = Image_Dataset(X)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
latents = []
for i, x in enumerate(dataloader):
#数据格式转换,以及取出相应格式的数据
x = torch.FloatTensor(x)
vec, img = model(x.cuda())
if i == 0:
#view()函数的功能根reshape类似,用来转换size大小。
#x = x.view(batchsize, -1)中batchsize指转换后有几行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。
latents = vec.view(img.size()[0], -1).cpu().detach().numpy()#此处将前面的维度拉伸平铺转为一维.
else:
latents = np.concatenate((latents, vec.view(img.size()[0], -1).cpu().detach().numpy()), axis = 0)
#在零轴方向上合并
print('Latents Shape:', latents.shape)
return latents
def predict(latents):
# First Dimension Reduction
#这里用到的rbf核函数
transformer = KernelPCA(n_components=200, kernel='rbf', n_jobs=-1)
#n_components:
#意义:PCA算法中所要保留的主成分个数n,也即保留下来的特征个数n
#n_jobs:int型变量,并行运行的个数。
#-1:使用所有CPU. n_jobs<-1时,使用(n_cpus+1+n_jobs)个CPU
#transform函数是一定可以替换为fit_transform函数的
#fit_transform函数不能替换为transform函数!
#fit前缀只是方便后面API调用.
kpca = transformer.fit_transform(latents)
print('First Reduction Shape:', kpca.shape)
# # Second Dimesnion Reduction
X_embedded = TSNE(n_components=2).fit_transform(kpca)
print('Second Reduction Shape:', X_embedded.shape)
# Clustering
#n_cluster:类中心的个数,默认为8
#random_state:参数为int,RandomState instance or None.用来设置生成随机数的方式
pred = MiniBatchKMeans(n_clusters=2, random_state=0).fit(X_embedded)
pred = [int(i) for i in pred.labels_]
pred = np.array(pred)
return pred, X_embedded
def invert(pred):
return np.abs(1-pred)#返回绝对值
def save_prediction(pred, out_csv='prediction.csv'):
with open(out_csv, 'w') as f:
f.write('id,label\n')
for i, p in enumerate(pred):
f.write(f'{i},{p}\n')
print(f'Save prediction to {out_csv}.')
# load model
model = AE().cuda()
model.load_state_dict(torch.load('./checkpoints/last_checkpoint.pth'))
model.eval()
# 準備 data
trainX = np.load('trainX.npy')
# 預測答案
latents = inference(X=trainX, model=model)
pred, X_embedded = predict(latents)
# 將預測結果存檔,上傳 kaggle
save_prediction(pred, 'prediction.csv')
# 由於是 unsupervised 的二分類問題,我們只在乎有沒有成功將圖片分成兩群
# 如果上面的檔案上傳 kaggle 後正確率不足 0.5,只要將 label 反過來就行了
save_prediction(invert(pred), 'prediction_invert.csv')
valX = np.load('valX.npy')
valY = np.load('valY.npy')
# ==============================================
# 我們示範 basline model 的作圖,
# report 請同學另外還要再畫一張 improved model 的圖。
# ==============================================
model.load_state_dict(torch.load('./checkpoints/last_checkpoint.pth'))
model.eval()
latents = inference(valX, model)
pred_from_latent, emb_from_latent = predict(latents)
acc_latent = cal_acc(valY, pred_from_latent)
print('The clustering accuracy is:', acc_latent)
print('The clustering result:')
plot_scatter(emb_from_latent, valY, savefig='p1_baseline.png')
import matplotlib.pyplot as plt
import numpy as np
# 畫出原圖
plt.figure(figsize=(10,4))
indexes = [1,2,3,6,7,9]
imgs = trainX[indexes,]
for i, img in enumerate(imgs):
plt.subplot(2, 6, i+1, xticks=[], yticks=[])
plt.imshow(img)
# 畫出 reconstruct 的圖
inp = torch.Tensor(trainX_preprocessed[indexes,]).cuda()
latents, recs = model(inp)
recs = ((recs+1)/2 ).cpu().detach().numpy()
recs = recs.transpose(0, 2, 3, 1)
for i, img in enumerate(recs):
plt.subplot(2, 6, 6+i+1, xticks=[], yticks=[])
plt.imshow(img)
plt.tight_layout()
import glob
checkpoints_list = sorted(glob.glob('checkpoints/checkpoint_*.pth'))
# load data
dataset = Image_Dataset(trainX_preprocessed)
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)
points = []
with torch.no_grad():
for i, checkpoint in enumerate(checkpoints_list):
print('[{}/{}] {}'.format(i+1, len(checkpoints_list), checkpoint))
model.load_state_dict(torch.load(checkpoint))
model.eval()
err = 0
n = 0
for x in dataloader:
x = x.cuda()
_, rec = model(x)
err += torch.nn.MSELoss(reduction='sum')(x, rec).item()
n += x.flatten().size(0)
print('Reconstruction error (MSE):', err/n)
latents = inference(X=valX, model=model)
pred, X_embedded = predict(latents)
acc = cal_acc(valY, pred)
print('Accuracy:', acc)
points.append((err/n, acc))
代码需要好好多看几遍,光有理论很难上手.