给定一组3维数据,分别利用主成分分析(PCA)、线性判别分析(LDA)、KPCA、Isomap、LLE、Laplacian Eigenmaps投影到两维,并画出结果图。(注意:swiss-data.txt的第一维是类别标号,共3类,仅为LDA用。)
# -*-coding:utf-8
'''
给定一组3维数据,分别利用主成分分析(PCA)、
线性判别分析(LDA)、
KPCA、
Isomap、
LLE、
Laplacian Eigenmaps投影到两维,并画出结果图。
(注意:swiss-data.txt的第一维是类别标号,共3类,仅为LDA用。)
'''
import sys,os
import numpy as np
import random
import matplotlib.pyplot as plt
#注意:swissroll-data.txt的第一维是类别标号,共3类,仅为LDA用。
#数据预处理,返回 (2000,4)
def pre_dataset(path):
data = []
for line in open(path,"r"): #设置文件对象并读取每一行文件
right_line = []
pre_ata = line.split('\r')
pre_ata = pre_ata[0].split(' ')
for x in pre_ata:
if len(x) > 5:
x = float(x)
right_line.append(x)
data.append(right_line)
array_data = np.array(data)
return array_data
#线性判别分析(LDA)
def lda_visualization(x_data,y_data):
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(x_data,y_data)
X_new = lda.transform(x_data)
tile_name = 'lda'
plt.title(tile_name)
plt.scatter(X_new[:, 0], X_new[:, 1],marker='o',c=y_data)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
#主成分分析(PCA
def pca_visualization(data,y):
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(data)
X_new = pca.transform(data)
tile_name = 'pca'
plt.title(tile_name)
plt.scatter(X_new[:, 0], X_new[:, 1],marker='o',c=y)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
#kernel: “linear” | “poly” | “rbf” | “sigmoid” | “cosine” | “precomputed” KPCA
def Kpca_visualization(data,y,kernel_):
from sklearn.decomposition import KernelPCA
if kernel_ == 1:
kernel_ = 'linear'
elif kernel_ == 2:
kernel_ = 'poly'
elif kernel_ == 3:
kernel_ = 'rbf'
elif kernel_ == 4:
kernel_ = 'sigmoid'
elif kernel_ == 5:
kernel_ = 'cosine'
Kpca = KernelPCA(kernel=kernel_)
Kpca.fit(data)
X_back = Kpca.transform(data)
tile_name = "Kpca-kernel_"+kernel_
plt.title(tile_name)
plt.scatter(X_back[:, 0], X_back[:, 1],marker='o',c=y)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
#Isomap 等度量映射
def Isomap_visualization(data,y):
from sklearn.manifold import Isomap
embedding = Isomap(n_components=2)
X_transformed = embedding.fit_transform(data)
tile_name = "Isomap"
plt.title(tile_name)
plt.scatter(X_transformed[:, 0], X_transformed[:, 1],marker='o',c=y)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
#LLE 局部线性嵌入
def LLE_visualization(data,y):
from sklearn.manifold import locally_linear_embedding as LLE
X_transformed, err = LLE(data, n_neighbors=12,n_components=2)
tile_name = "Laplacian_Eigenmaps"
plt.title(tile_name+"\n Reconstruction error:"+str(err))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1],marker='o',c=y)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
#LE 拉普拉斯特征映射
def LE_visualization(data,y):
from sklearn.manifold import SpectralEmbedding as SE
se = SE(n_components=2)
X_transformed = se.fit_transform(data)
tile_name = "Laplacian_Eigenmaps"
plt.title(tile_name)
plt.scatter(X_transformed[:, 0], X_transformed[:, 1],marker='o',c=y)
plt.savefig('result/'+tile_name+'.jpg')
plt.show()
if __name__ == '__main__':
data = pre_dataset('swissroll-data.txt')
y_data = data[:,0]
x_data = data[:,1:]
print (x_data.shape,y_data.shape)
lda_visualization(x_data,y_data)
pca_visualization(data,y_data)
Isomap_visualization(data,y_data)
LLE_visualization(data,y_data)
LE_visualization(data,y_data)
for i in range (1,5):
Kpca_visualization(data,y_data,i)