封装t-sne绘图

介绍

t-sne是一种将高维数据降维的算法,可以降成2维,然后画图显示出来,用来观察高维数据分布。

代码

使用方法,转成ndarray格式,用add_data一点点丢进去,然后调用draw就可以了

import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold


class TsnePlotter():
    '''
    t-sne绘图
    使用方法:一个一个用add_data添加数据,然后调用draw画图
    '''
    def __init__(self):
        self.class_num = 0 # 类别数量
        self.class_name_list = [] # 类别的名字
        self.data_list = [] # 数据转为一维的ndarray,存入列表
        self.label_list = [] # 存类别的序号

    def add_data(self,data,class_name):
        """
        添加数据
        params:
        {
            data[ndarray]:数据,添加的每一个数据长度应该一样
            class_name[str]:类别的名字
        }
        """
        self.data_list.append(data.reshape(-1))
        if class_name not in self.class_name_list:
            self.class_name_list.append(class_name)
            self.class_num+=1
        self.label_list.append(self.class_name_list.index(class_name))

    def draw(self):
        """
        画t-sne
        """
        tsne = manifold.TSNE(n_components=2, init='pca', random_state=501)
        np_data = np.concatenate(tuple(self.data_list),axis=0)
        np_data = np_data.reshape(len(self.data_list),-1)
        X_tsne = tsne.fit_transform(np_data)
        x_min, x_max = X_tsne.min(0), X_tsne.max(0)
        X_norm = (X_tsne - x_min) / (x_max - x_min)  # 归一化
        plt.figure(figsize=(8, 8))
        for i in range(X_norm.shape[0]):
            plt.scatter(X_norm[i, 0], X_norm[i, 1],color=plt.cm.Set1(self.label_list[i]),label=self.class_name_list[self.label_list[i]])
        plt.xticks([])
        plt.yticks([])

        handles, labels = plt.gca().get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(by_label.values(), by_label.keys())
        plt.show()

    def show_profile(self):
        """
        打印数据基本信息
        """
        print('data num:{}'.format(len(self.data_list)))
        print('data dimention:{}'.format(self.data_list[0].shape[0]))
        print('label num:{}'.format(self.class_num))
        print('label categories:{}'.format(self.class_name_list))


if __name__ == "__main__":
    # 使用手写数字数据集测试
    from sklearn import datasets
    digits = datasets.load_digits(n_class=6)
    X, y = digits.data, digits.target
    tsne = TsnePlotter()
    for index in range(X.shape[0]):
        tsne.add_data(X[index,:],y[index])
    tsne.show_profile()
    tsne.draw()

用手写数字数据集测试的结果

封装t-sne绘图_第1张图片
测试完毕,开始跑自己的图,我16g的内存都要爆了

封装t-sne绘图_第2张图片

总共8k多张图,都resize成了256x256
在这里插入图片描述
跑自己数据的程序:

from t_sne_plotter import TsnePlotter
import os
import tifffile
from tqdm import tqdm
from PIL import Image
import numpy as np


def load_data(data_path):
    LEVIR_CD_img_path = os.path.join(data_path,'LEVIR_CD','train','post_img') # D:\data\songkq_data\LEVIR_CD\train
    SemiCD_Google_img_path = os.path.join(data_path,'SemiCD_Google','train','post_img')
    WHU_DSIFN_img_path = os.path.join(data_path,'WHU_DSIFN','train','post_img')
    data_list, label_list = [], []
    for path in os.listdir(LEVIR_CD_img_path):
        data_list.append(os.path.join(LEVIR_CD_img_path,path))
        label_list.append('LEVIR_CD')
    for path in os.listdir(SemiCD_Google_img_path):
        data_list.append(os.path.join(SemiCD_Google_img_path,path))
        label_list.append('SemiCD_Google')
    for path in os.listdir(WHU_DSIFN_img_path):
        data_list.append(os.path.join(WHU_DSIFN_img_path,path))
        label_list.append('WHU_DSIFN')
    return data_list, label_list

if __name__ == "__main__":
    tsne = TsnePlotter()
    data_path_list, label_list = load_data(r'D:\data\songkq_data')
    for index, path in tqdm(enumerate(data_path_list)):
        # 图片尺寸大小必须一样
        data = tifffile.imread(path)
        PILimage = Image.fromarray(np.uint8(data))
        resized_image = PILimage.resize((256, 256))
        data = np.asarray(resized_image)
        tsne.add_data(data,label_list[index])
    tsne.show_profile()
    tsne.draw()

不同的遥感数据集:
封装t-sne绘图_第3张图片

你可能感兴趣的:(Python,numpy,可视化,python)