完整的最简单的谱聚类python代码

http://blog.csdn.net/waleking/article/details/7584084


针对karate_club数据集,做了谱聚类。由于是2-way clustering,比较简单,得到了图的新的表示空间之后,没有做k-means,仅仅针对正规化后的拉普拉斯矩阵的第二特征值做了符号判断,这和 Spectral Clustering Tutorial 一文中的描述一致。

引用了numpy scipy matplotlib networkx包




#coding=utf-8
#MSC means Multiple Spectral Clustering 
import numpy as np
import scipy as sp
import scipy.linalg as linalg
import networkx as nx
import matplotlib.pyplot as plt

def getNormLaplacian(W):
 """input matrix W=(w_ij)
 "compute D=diag(d1,...dn)
 "and L=D-W
 "and Lbar=D^(-1/2)LD^(-1/2)
 "return Lbar
 """
 d=[np.sum(row) for row in W]
 D=np.diag(d)
 L=D-W
 #Dn=D^(-1/2)
 Dn=np.power(np.linalg.matrix_power(D,-1),0.5)
 Lbar=np.dot(np.dot(Dn,L),Dn)
 return Lbar



def getKSmallestEigVec(Lbar,k):
 """input
 "matrix Lbar and k
 "return
 "k smallest eigen values and their corresponding eigen vectors
 """
 eigval,eigvec=linalg.eig(Lbar)
 dim=len(eigval)
 #查找前k小的eigval
 dictEigval=dict(zip(eigval,range(0,dim)))
 kEig=np.sort(eigval)[0:k]
 ix=[dictEigval[k] for k in kEig]
 return eigval[ix],eigvec[:,ix]



def checkResult(Lbar,eigvec,eigval,k):
 """
 "input
 "matrix Lbar and k eig values and k eig vectors
 "print norm(Lbar*eigvec[:,i]-lamda[i]*eigvec[:,i])
 """
 check=[np.dot(Lbar,eigvec[:,i])-eigval[i]*eigvec[:,i] for i in range(0,k)]
 length=[np.linalg.norm(e) for e in check]/np.spacing(1)
 print("Lbar*v-lamda*v are %s*%s" % (length,np.spacing(1)))



g=nx.karate_club_graph()
nodeNum=len(g.nodes())
m=nx.to_numpy_matrix(g)
Lbar=getNormLaplacian(m)
k=2
kEigVal,kEigVec=getKSmallestEigVec(Lbar,k)
print("k eig val are %s" % kEigVal)
print("k eig vec are %s" % kEigVec)
checkResult(Lbar,kEigVec,kEigVal,k)

#跳过k means,用最简单的符号判别的方法来求点的归属

clusterA=[i for i in range(0,nodeNum) if kEigVec[i,1]>0]
clusterB=[i for i in range(0,nodeNum) if kEigVec[i,1]<0]

#draw graph
colList=dict.fromkeys(g.nodes())
for node,score in colList.items():
 if node in clusterA:
  colList[node]=0
 else:
  colList[node]=0.6



plt.figure(figsize=(8,8))
pos=nx.spring_layout(g)
nx.draw_networkx_edges(g,pos,alpha=0.4)
nx.draw_networkx_nodes(g,pos,nodelist=colList.keys(),
  node_color=colList.values(),
  cmap=plt.cm.Reds_r)

nx.draw_networkx_labels(g,pos,font_size=10,font_family='sans-serif')
plt.axis('off')
plt.title("karate_club spectral clustering")
plt.savefig("spectral_clustering_result.png")
plt.show()
完整的最简单的谱聚类python代码_第1张图片

你可能感兴趣的:(机器学习,谱聚类)