python 谱聚类 幂迭代

紧接着上篇博文,实现了幂迭代聚类算法:

# encoding=utf-8
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import normalize
from sklearn.datasets import make_blobs


def similarity(points):
    """
    亲和矩阵
    :param points:
    :return:
    """
    res = rbf_kernel(points)
    for i in range(len(res)):
        res[i, i] = 0
    return res


def spectral(points, k):
    """
    谱聚类
    :param points:
    :param k:
    :return:
    """
    W = similarity(points)
    Dn = np.diag(np.power(np.sum(W, axis=1), -0.5))
    L = np.eye(len(points)) - np.dot(np.dot(Dn, W), Dn)
    eigvals, eigvecs = LA.eig(L)
    indices = np.argsort(eigvals)[:k]
    subvecs = normalize(eigvecs[:, indices])
    return KMeans(n_clusters=k).fit_predict(subvecs)


def normvec(vec):
    """
    对一个向量进行1范数正则化
    :param vec:
    :return:
    """
    return vec / LA.norm(vec, ord=1)


def pic(points, k):
    """
    幂迭代
    :param points:
    :param k:
    :return:
    """
    n = len(points)
    W = normalize(similarity(points), norm='l1')
    v0 = np.zeros(n)
    v1 = normvec(np.random.rand(n))
    d0 = v0.copy()
    d1 = v1.copy()
    threshold = 1e-5 / n
    while LA.norm(d1 - d0, ord=np.inf) >= threshold:
        v0 = v1.copy()
        v1 = normvec(np.dot(W, v1))
        d0 = d1.copy()
        d1 = np.abs(v1 - v0)
    return KMeans(n_clusters=k).fit_predict(v1.reshape(-1, 1))


# 数据
X, y = make_blobs()
labels = pic(X, 3)

# 画图
plt.style.use('ggplot')
# 原数据
fig, (ax0, ax1) = plt.subplots(ncols=2)
ax0.scatter(X[:, 0], X[:, 1], c=y)
ax0.set_title('raw data')
ax0.axis('equal')
# 聚类结果
ax1.scatter(X[:, 0], X[:, 1], c=labels)
ax1.set_title('Power Iteration Clustering')
ax1.axis('equal')
plt.show()


你可能感兴趣的:(python 谱聚类 幂迭代)