DBSCAN 基于密度的聚类算法

import numpy as np
import pickle

# 从文件中读取对象
with open('clusters.pkl', 'rb') as f:
    clusters = pickle.load(f)

data_list = []
for index_data, datas in enumerate(clusters):
    dk = []
    for i, data_cluster in enumerate(datas):
        for d in data_cluster.data:
            dk.append(d)
    data_list.append(dk)


def DBSCAN(points, eps, min_points):
    """
    DBSCAN算法实现
    :param points: 数据点数组,每行表示一个数据点
    :param eps: 半径
    :param min_points: 最小点数
    :return: 聚类标签数组
    """
    # 初始化聚类标签数组,所有初始值为-1,表示未分类
    labels = [-1] * len(points)
    # 初始化点的核心性数组
    core_points = np.zeros(len(points), dtype=bool)

    # 计算每个点的核心性
    for i in range(len(points)):
        # 计算以当前点为圆心,eps为半径的圆形区域内的点数
        count = np.sum(np.linalg.norm(points - points[i], axis=1) <= eps)
        # 如果点的个数大于等于min_points,则该点为核心点
        if count >= min_points:
            core_points[i] = True

    # 标记聚类号
    cluster_id = 0

    # 开始聚类
    for i in range(len(points)):
        # 如果当前点已有聚类标签,则跳过
        if labels[i] != -1:
            continue

        # 如果当前点为核心点,则从当前点开始扩展聚类
        if core_points[i]:
            labels[i] = cluster_id
            # 扩展当前点的聚类
            expand_cluster(points, labels, core_points, i, cluster_id, eps, min_points)
            # 聚类号加1
            cluster_id += 1

    return labels


def expand_cluster(points, labels, core_points, point_id, cluster_id, eps, min_points):
    """
    扩展当前点的聚类
    :param points: 数据点数组,每行表示一个数据点
    :param labels: 聚类标签数组
    :param core_points: 点的核心性数组
    :param point_id: 当前点的索引
    :param cluster_id: 当前聚类的标签
    :param eps: 半径
    :param min_points: 最小点数
    :return: None
    """
    # 找到以当前点为圆心,eps为半径的圆形区域内的所有点
    neighbor_ids = np.where(np.linalg.norm(points - points[point_id], axis=1) <= eps)[0]

    # 如果当前点不是核心点,则只标记该点为当前聚类
    if not core_points[point_id]:
        labels[point_id] = cluster_id
        return

    # 将当前点及其邻居点都标记为当前聚类
    for i in neighbor_ids:
        if labels[i] == -1:
            labels[i] = cluster_id
            # 如果邻居点也是核心点,则继续扩展聚类
            if core_points[i]:
                expand_cluster(points, labels, core_points, i, cluster_id, eps, min_points)


data_value = np.array(data_list[0])
l = DBSCAN(data_value, 0.5, 5)

import matplotlib.pyplot as plt

colors = ['red', 'blue', 'green', 'orange', 'purple','black']
for i in range(len(l)):
    if l[i] != -1:
        plt.scatter(data_value[i][0], data_value[i][1], c=colors[l[i]])
plt.show()



结果图

DBSCAN 基于密度的聚类算法_第1张图片

你可能感兴趣的:(聚类,算法,机器学习)