一、k-means聚类算法import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
plt.figure(figsize=(5, 5))
n_samples = 1500
random_state = 170
X = []
# 读入数据
with open("data.txt", 'r') as f:
for line in f.readlines():
X.append(line[1:-2].strip().split(','))
for i in range(len(X)):
X[i] = list(map(float, X[i]))
print("数据读入完成……")
X = np.array(X)
# Incorrect number of clusters
y_pred = KMeans(n_clusters=4, random_state=random_state).fit_predict(X)
plt.subplot(111)
plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=1)
plt.xticks([])
plt.yticks([])
plt.show()
二、Mean-Shiftimport numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets import make_blobs
from itertools import cycle
plt.figure(figsize=(5, 5))
'''--------第一组数据---------'''
X = []
# 读入数据
with open("data.txt", 'r') as f:
for line in f.readlines():
X.append(line[1:-2].strip().split(','))
for i in range(len(X)):
X[i] = list(map(float, X[i]))
print("数据读入完成……")
X = np.array(X)
# 聚类
bandwidth = estimate_bandwidth(X, quantile=0.15, n_samples=500)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
# 画图
plt.subplot(111)
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
#plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
# markeredgecolor='k', markersize=14, )
plt.xticks([])
plt.yticks([])
plt.show()
三、DBSCANimport numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
X = []
# 读入数据
with open("data.txt", 'r') as f:
for line in f.readlines():
X.append(line[1:-2].strip().split(','))
for i in range(len(X)):
X[i] = list(map(float, X[i]))
print("数据读入完成……")
X = np.array(X)
X = StandardScaler().fit_transform(X)
# #############################################################################
# Compute DBSCAN
db = DBSCAN(eps=0.2, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)
# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]
class_member_mask = (labels == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=5)
xy = X[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=5)
plt.xticks([])
plt.yticks([])
plt.show()
四、聚合聚类算法from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle ##python自带的迭代器模块
X = []
# 读入数据
with open("data.txt", 'r') as f:
for line in f.readlines():
X.append(line[1:-2].strip().split(','))
for i in range(len(X)):
X[i] = list(map(float, X[i]))
print("数据读入完成……")
X = np.array(X)
##设置分层聚类函数
linkages = ['ward', 'average', 'complete']
n_clusters_ = 4
ac = AgglomerativeClustering(linkage=linkages[2], n_clusters=n_clusters_)
##训练数据
ac.fit(X)
##每个数据的分类
lables = ac.labels_
##绘图
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
##根据lables中的值是否等于k,重新组成一个True、False的数组
my_members = lables == k
##X[my_members, 0] 取出my_members对应位置为True的值的横坐标
plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
plt.xticks([])
plt.yticks([])
plt.show()