谱聚类算法python代码_14 聚类算法 - 代码案例六- 谱聚类(SC)算法案例-阿里云开发者社区...

需求 使用scikit的相关API创建模拟数据,然后使用谱聚类算法进行数据聚类操作,并比较算法在不同参数情况下的聚类效果。

常规操作:

import numpy as np

import matplotlib as mpl

import matplotlib.pyplot as plt

import sklearn.datasets as ds

import matplotlib.colors

import warnings

from sklearn.cluster import SpectralClustering#引入谱聚类

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import euclidean_distances

## 设置属性防止中文乱码及拦截异常信息

mpl.rcParams['font.sans-serif'] = [u'SimHei']

mpl.rcParams['axes.unicode_minus'] = False

warnings.filterwarnings('ignore', category=FutureWarning)

1、创建模拟数据

N = 1000

centers = [[1, 2], [-1, -1], [1, -1], [-1, 1]]

#符合高斯分布的数据集

data1, y1 = ds.make_blobs(N, n_features=2, centers=centers,

cluster_std=(0.75,0.5,0.3,0.25), random_state=0)

data1 = StandardScaler().fit_transform(data1)

dist1 = euclidean_distances(data1, squared=True)

2、 数据2 - 圆形数据集

t = np.arange(0, 2 * np.pi, 0.1)

data2_1 = np.vstack((np.cos(t), np.sin(t))).T

data2_2 = np.vstack((2*np.cos(t), 2*np.sin(t))).T

data2_3 = np.vstack((3*np.cos(t), 3*np.sin(t))).T

data2 = np.vstack((data2_1, data2_2, data2_3))

y2 = np.vstack(([0] * len(data2_1), [1] * len(data2_2), [2] * len(data2_3)))

datasets = [(data1, y1), (data2, y2.ravel())]

def expandBorder(a, b):

d = (b - a) * 0.1

return a-d, b+d

3、画图

colors = ['r', 'g', 'b', 'y']

cm = mpl.colors.ListedColormap(colors)

for i,(X, y) in enumerate(datasets):

x1_min, x2_min = np.min(X, axis=0)

x1_max, x2_max = np.max(X, axis=0)

x1_min, x1_max = expandBorder(x1_min, x1_max)

x2_min, x2_max = expandBorder(x2_min, x2_max)

n_clusters = len(np.unique(y))

plt.figure(figsize=(12, 8), facecolor='w')

plt.suptitle(u'谱聚类--数据%d' % (i+1), fontsize=20)

plt.subplots_adjust(top=0.9,hspace=0.35)

#谱聚类的建模

gamma_list = [0.1,5,10]

nclusters = [4,3]

for i, ncluster in enumerate(nclusters):

for j,gamma_value in enumerate(gamma_list):

spectral = SpectralClustering(n_clusters=ncluster,

gamma = gamma_value, affinity='laplacian',assign_labels='kmeans')

y_hat = spectral.fit_predict(X)

unique_y_hat = np.unique(y_hat)

## 开始画图

plt.subplot(2,3,j+1)

for k, col in zip(unique_y_hat, colors):

cur = (y_hat == k)

plt.scatter(X[cur, 0], X[cur, 1], s=40, c=col, edgecolors='k')

plt.xlim((x1_min, x1_max))

plt.ylim((x2_min, x2_max))

plt.grid(True)

plt.title('$\gamma$ = %.2f ,聚类簇数目:%d' % (gamma_value, n_clusters),

fontsize=16)

plt.subplot(234)

plt.scatter(X[:, 0], X[:,1], c=y, s=30, cmap=cm, edgecolors='none')

plt.xlim((x1_min, x1_max))

plt.ylim((x2_min, x2_max))

plt.title('原始数据,聚类簇数目:%d' % len(np.unique(y)))

plt.grid(True)

plt.show()

你可能感兴趣的:(谱聚类算法python代码)