t-sne是一种将高维数据降维的算法,可以降成2维,然后画图显示出来,用来观察高维数据分布。
使用方法,转成ndarray格式,用add_data一点点丢进去,然后调用draw就可以了
import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold
class TsnePlotter():
'''
t-sne绘图
使用方法:一个一个用add_data添加数据,然后调用draw画图
'''
def __init__(self):
self.class_num = 0 # 类别数量
self.class_name_list = [] # 类别的名字
self.data_list = [] # 数据转为一维的ndarray,存入列表
self.label_list = [] # 存类别的序号
def add_data(self,data,class_name):
"""
添加数据
params:
{
data[ndarray]:数据,添加的每一个数据长度应该一样
class_name[str]:类别的名字
}
"""
self.data_list.append(data.reshape(-1))
if class_name not in self.class_name_list:
self.class_name_list.append(class_name)
self.class_num+=1
self.label_list.append(self.class_name_list.index(class_name))
def draw(self):
"""
画t-sne
"""
tsne = manifold.TSNE(n_components=2, init='pca', random_state=501)
np_data = np.concatenate(tuple(self.data_list),axis=0)
np_data = np_data.reshape(len(self.data_list),-1)
X_tsne = tsne.fit_transform(np_data)
x_min, x_max = X_tsne.min(0), X_tsne.max(0)
X_norm = (X_tsne - x_min) / (x_max - x_min) # 归一化
plt.figure(figsize=(8, 8))
for i in range(X_norm.shape[0]):
plt.scatter(X_norm[i, 0], X_norm[i, 1],color=plt.cm.Set1(self.label_list[i]),label=self.class_name_list[self.label_list[i]])
plt.xticks([])
plt.yticks([])
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())
plt.show()
def show_profile(self):
"""
打印数据基本信息
"""
print('data num:{}'.format(len(self.data_list)))
print('data dimention:{}'.format(self.data_list[0].shape[0]))
print('label num:{}'.format(self.class_num))
print('label categories:{}'.format(self.class_name_list))
if __name__ == "__main__":
# 使用手写数字数据集测试
from sklearn import datasets
digits = datasets.load_digits(n_class=6)
X, y = digits.data, digits.target
tsne = TsnePlotter()
for index in range(X.shape[0]):
tsne.add_data(X[index,:],y[index])
tsne.show_profile()
tsne.draw()
用手写数字数据集测试的结果
总共8k多张图,都resize成了256x256
跑自己数据的程序:
from t_sne_plotter import TsnePlotter
import os
import tifffile
from tqdm import tqdm
from PIL import Image
import numpy as np
def load_data(data_path):
LEVIR_CD_img_path = os.path.join(data_path,'LEVIR_CD','train','post_img') # D:\data\songkq_data\LEVIR_CD\train
SemiCD_Google_img_path = os.path.join(data_path,'SemiCD_Google','train','post_img')
WHU_DSIFN_img_path = os.path.join(data_path,'WHU_DSIFN','train','post_img')
data_list, label_list = [], []
for path in os.listdir(LEVIR_CD_img_path):
data_list.append(os.path.join(LEVIR_CD_img_path,path))
label_list.append('LEVIR_CD')
for path in os.listdir(SemiCD_Google_img_path):
data_list.append(os.path.join(SemiCD_Google_img_path,path))
label_list.append('SemiCD_Google')
for path in os.listdir(WHU_DSIFN_img_path):
data_list.append(os.path.join(WHU_DSIFN_img_path,path))
label_list.append('WHU_DSIFN')
return data_list, label_list
if __name__ == "__main__":
tsne = TsnePlotter()
data_path_list, label_list = load_data(r'D:\data\songkq_data')
for index, path in tqdm(enumerate(data_path_list)):
# 图片尺寸大小必须一样
data = tifffile.imread(path)
PILimage = Image.fromarray(np.uint8(data))
resized_image = PILimage.resize((256, 256))
data = np.asarray(resized_image)
tsne.add_data(data,label_list[index])
tsne.show_profile()
tsne.draw()