DBSCAN的基础知识点可以参考如下的链接
4-DBSCAN聚类算法_哔哩哔哩_bilibili
现在我们来实现一下:
# 引入数据集
from sklearn.datasets import make_blobs
# 引入画图的库
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
#初始化中心点
centers = [(0, 4), (5, 5) , (8,2)]
#
cluster_std = [1.2, 1, 1.1]
# 抽离出samples
X, y= make_blobs(n_samples=200, cluster_std=cluster_std, centers=centers, n_features=2, random_state=1)
检查此点是否在圈内
def check_core_point(eps,minPts, df, index):
#get points from given index
x, y = df.iloc[index]['X'] , df.iloc[index]['Y']
#check available points within radius
temp = df[((np.abs(x - df['X']) <= eps) & (np.abs(y - df['Y']) <= eps)) & (df.index != index)]
#check how many points are present within radius
if len(temp) >= minPts:
#return format (dataframe, is_core, is_border, is_noise)
return (temp.index , True, False, False)
elif (len(temp) < minPts) and len(temp) > 0:
#return format (dataframe, is_core, is_border, is_noise)
return (temp.index , False, True, False)
elif len(temp) == 0:
#return format (dataframe, is_core, is_border, is_noise)
return (temp.index , False, False, True)
#返回分离好的聚类
#radius of the circle defined as 0.6
eps = 0.5
#minimum neighbouring points set to 3
minPts = 3
data = pd.DataFrame(X, columns = ["X", "Y"] )
clustered = cluster_with_stack(eps, minPts, data)
idx , cluster = list(zip(*clustered))
cluster_df = pd.DataFrame(clustered, columns = ["idx", "cluster"])
plt.figure(figsize=(10,7))
for clust in np.unique(cluster):
plt.scatter(X[cluster_df["idx"][cluster_df["cluster"] == clust].values, 0], X[cluster_df["idx"][cluster_df["cluster"] == clust].values, 1], s=10, label=f"Cluster{clust}")
plt.legend([f"Cluster {clust}" for clust in np.unique(cluster)], loc ="lower right")
plt.title('Clustered Data')
plt.xlabel('X')
plt.ylabel('Y')