DBSCAN 聚类及其实现

DBSCAN的基础知识点可以参考如下的链接

4-DBSCAN聚类算法_哔哩哔哩_bilibili

现在我们来实现一下:

# 引入数据集
from sklearn.datasets import make_blobs
# 引入画图的库
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
#初始化中心点
centers = [(0, 4), (5, 5) , (8,2)]
#
cluster_std = [1.2, 1, 1.1]
# 抽离出samples
X, y= make_blobs(n_samples=200, cluster_std=cluster_std, centers=centers, n_features=2, random_state=1)

检查此点是否在圈内

def check_core_point(eps,minPts, df, index):
    #get points from given index
    x, y = df.iloc[index]['X']  ,  df.iloc[index]['Y']
    
    #check available points within radius
    temp =  df[((np.abs(x - df['X']) <= eps) & (np.abs(y - df['Y']) <= eps)) & (df.index != index)]
    
    #check how many points are present within radius
    if len(temp) >= minPts:
        #return format (dataframe, is_core, is_border, is_noise)
        return (temp.index , True, False, False)
    
    elif (len(temp) < minPts) and len(temp) > 0:
        #return format (dataframe, is_core, is_border, is_noise)
        return (temp.index , False, True, False)
    
    elif len(temp) == 0:
        #return format (dataframe, is_core, is_border, is_noise)
        return (temp.index , False, False, True)

#返回分离好的聚类

#radius of the circle defined as 0.6
eps = 0.5
#minimum neighbouring points set to 3
minPts = 3

data = pd.DataFrame(X, columns = ["X", "Y"] )
clustered = cluster_with_stack(eps, minPts, data)

idx , cluster = list(zip(*clustered))
cluster_df = pd.DataFrame(clustered, columns = ["idx", "cluster"])

plt.figure(figsize=(10,7))
for clust in np.unique(cluster):
    plt.scatter(X[cluster_df["idx"][cluster_df["cluster"] == clust].values, 0], X[cluster_df["idx"][cluster_df["cluster"] == clust].values, 1], s=10, label=f"Cluster{clust}")

plt.legend([f"Cluster {clust}" for clust in np.unique(cluster)], loc ="lower right")
plt.title('Clustered Data')
plt.xlabel('X')
plt.ylabel('Y')

DBSCAN 聚类及其实现_第1张图片

 

你可能感兴趣的:(data,mining,聚类,算法)