DBSCAN算法python实现

DBSCAN原理是基于距离定义密度,使用时用户需要自定义距离范围eps和最小点数minpt。DNSCAN的算法流程如下:

  1. 初始化核心点集,输入距离范围和最小核心点数

  2. 遍历数据点,在给定距离和最小核心点数的情况下,得到核心点集合并存储下每个点的邻接点

  3. 从核心点集中随机选出点,开始分簇。将其邻域点加入至簇成员集合中,邻域中的核心点存储下来再进行循环,直至簇核心点为空则簇划分完毕,再执行第3步。

详细的算法流程可查看刘建平Pinard博客

Python代码如下:

# using utf-8

import numpy as np


def dbscan(x, minpt, esp, norm=1):
    x = np.array(x)
    N, D = x.shape
    core_points = set()  # 存放核心点
    neighborhood = {}  # 存放各个点的领域,用字典的形式
    unexplore = set(np.arange(N))  # 为探索集合
    cluster_result = []  # 存储分类结果

    if norm==1:
        for i in range(N):
            distance = abs(x - x[i, :]).sum(axis=1)  # 计算到该点的距离
            neighborhood_index = np.where(distance<=esp)[0]  # 取出邻域,得到的是ndarray
            neighborhood_index = list(set(neighborhood_index) - {i})
            neighborhood[str(i)] = neighborhood_index  # 存储领域
            if len(neighborhood_index) >= minpt:
                core_points.add(i)
            else:
                continue
    elif norm==2:
        for i in range(N):
            distance = ((x - x[i, :]) ** 2).sum(axis=1)  # 计算各节点的距离
            neighborhood_index = np.where(distance<=esp)[0]  # 取出邻域
            neighborhood[str(i)] = list(set(neighborhood_index) - {i})  # 存储邻域
            if len(neighborhood_index) >= minpt:
                core_points.add(i)
            else:
                continue
    else:
        print("请输入支持的范数形式:1或2")
        return None

    print(neighborhood)
    while len(core_points):
        print("当前核心数量为", len(core_points))
        cluster_core = set()
        cluster_core.add(core_points.pop())  # 取出第一个元素,core_points已被更改
        print(cluster_core)
        cluster_member = cluster_core.copy()  # 深度拷贝,可以通过id(cluster_member)查看内存位置
        while len(cluster_core):
            current_core = cluster_core.pop()  # 当前核心
            adding_cluster_core = core_points.intersection(set(neighborhood[str(current_core)]))  # 簇核心添加
            cluster_core.update(adding_cluster_core)  # 添加簇核心
            adding_member = unexplore.intersection(set(neighborhood[str(current_core)]))  # 簇成员添加
            cluster_member.update(adding_member)  # 添加成员
            unexplore.difference_update(cluster_member)  # 更新unexplore
            core_points.difference_update(cluster_core)  # 更新core_points
        cluster_result.append(cluster_member)

    return cluster_result

if __name__ == "__main__":
    x = np.array([[1, 2],
                  [2, 1],
                  [2, 4],
                  [4, 3],
                  [5, 8],
                  [6, 7],
                  [6, 9],
                  [7, 9],
                  [9, 5],
                  [1, 12],
                  [3, 12],
                  [5, 12],
                  [3, 3]])
    cluster_result = dbscan(x, minpt=3, esp=3, norm=1)
    print(cluster_result)

 

你可能感兴趣的:(DBSCAN算法python实现)