聚类算法评价指标python实现_python实现四种聚类算法

一、k-means聚类算法import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.datasets import make_blobs

plt.figure(figsize=(5, 5))

n_samples = 1500

random_state = 170

X = []

# 读入数据

with open("data.txt", 'r') as f:

for line in f.readlines():

X.append(line[1:-2].strip().split(','))

for i in range(len(X)):

X[i] = list(map(float, X[i]))

print("数据读入完成……")

X = np.array(X)

# Incorrect number of clusters

y_pred = KMeans(n_clusters=4, random_state=random_state).fit_predict(X)

plt.subplot(111)

plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=1)

plt.xticks([])

plt.yticks([])

plt.show()

二、Mean-Shiftimport numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import MeanShift, estimate_bandwidth

from sklearn.datasets import make_blobs

from itertools import cycle

plt.figure(figsize=(5, 5))

'''--------第一组数据---------'''

X = []

# 读入数据

with open("data.txt", 'r') as f:

for line in f.readlines():

X.append(line[1:-2].strip().split(','))

for i in range(len(X)):

X[i] = list(map(float, X[i]))

print("数据读入完成……")

X = np.array(X)

# 聚类

bandwidth = estimate_bandwidth(X, quantile=0.15, n_samples=500)

ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

ms.fit(X)

labels = ms.labels_

cluster_centers = ms.cluster_centers_

labels_unique = np.unique(labels)

n_clusters_ = len(labels_unique)

print("number of estimated clusters : %d" % n_clusters_)

# 画图

plt.subplot(111)

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')

for k, col in zip(range(n_clusters_), colors):

my_members = labels == k

cluster_center = cluster_centers[k]

plt.plot(X[my_members, 0], X[my_members, 1], col + '.')

#plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,

# markeredgecolor='k', markersize=14, )

plt.xticks([])

plt.yticks([])

plt.show()

三、DBSCANimport numpy as np

from sklearn.cluster import DBSCAN

from sklearn import metrics

from sklearn.datasets import make_blobs

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

X = []

# 读入数据

with open("data.txt", 'r') as f:

for line in f.readlines():

X.append(line[1:-2].strip().split(','))

for i in range(len(X)):

X[i] = list(map(float, X[i]))

print("数据读入完成……")

X = np.array(X)

X = StandardScaler().fit_transform(X)

# #############################################################################

# Compute DBSCAN

db = DBSCAN(eps=0.2, min_samples=10).fit(X)

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)

core_samples_mask[db.core_sample_indices_] = True

labels = db.labels_

# Number of clusters in labels, ignoring noise if present.

n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

n_noise_ = list(labels).count(-1)

# Black removed and is used for noise instead.

unique_labels = set(labels)

colors = [plt.cm.Spectral(each)

for each in np.linspace(0, 1, len(unique_labels))]

for k, col in zip(unique_labels, colors):

if k == -1:

# Black used for noise.

col = [0, 0, 0, 1]

class_member_mask = (labels == k)

xy = X[class_member_mask & core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),

markeredgecolor='k', markersize=5)

xy = X[class_member_mask & ~core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),

markeredgecolor='k', markersize=5)

plt.xticks([])

plt.yticks([])

plt.show()

四、聚合聚类算法from sklearn.datasets.samples_generator import make_blobs

from sklearn.cluster import AgglomerativeClustering

import numpy as np

import matplotlib.pyplot as plt

from itertools import cycle ##python自带的迭代器模块

X = []

# 读入数据

with open("data.txt", 'r') as f:

for line in f.readlines():

X.append(line[1:-2].strip().split(','))

for i in range(len(X)):

X[i] = list(map(float, X[i]))

print("数据读入完成……")

X = np.array(X)

##设置分层聚类函数

linkages = ['ward', 'average', 'complete']

n_clusters_ = 4

ac = AgglomerativeClustering(linkage=linkages[2], n_clusters=n_clusters_)

##训练数据

ac.fit(X)

##每个数据的分类

lables = ac.labels_

##绘图

plt.figure(1)

plt.clf()

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')

for k, col in zip(range(n_clusters_), colors):

##根据lables中的值是否等于k,重新组成一个True、False的数组

my_members = lables == k

##X[my_members, 0] 取出my_members对应位置为True的值的横坐标

plt.plot(X[my_members, 0], X[my_members, 1], col + '.')

plt.xticks([])

plt.yticks([])

plt.show()

你可能感兴趣的:(聚类算法评价指标python实现_python实现四种聚类算法)