密度聚类算法OPTICS实现

基于密度聚类算法OPTICS最关键的是得到样本点聚类顺序图,相比于另一个密度聚类算法DBSCAN,OPTICS对输入的参数eps和MinPts不敏感。参数eps和MinPts分别代表半径和最小样本数,定义半径eps范围内邻居点数量大于等于MinPts的样本点为核心点。本文默认eps为正无穷,且MinPts在总样本数之内,则任何一个样本点都可充当核心点,下面基于此情形采用Python实现OPTICS的密度聚类顺序图。

# 导入所需包
import numpy as np
from math import sqrt
import matplotlib.pyplot as plt
class Object:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.Processed = False
        self.reachability_distance = 'UNDEFINED'
        self.core_distance = 'UNDEFINED'

    def dist(self, obj):
        return sqrt((self.x - obj.x) ** 2 + (self.y - obj.y) ** 2)
    
    def setCoreDistance(self, MinPts, SetOfObjects):
        DisList = []
        for i in SetOfObjects:
            DisList.append(self.dist(i))
            DisList.sort()
        return DisList[MinPts-1]
    
class OPTICS:
    def __init__(self, X, MinPts):
        self.X = X
        self.MinPts = MinPts
        self.SetOfObjects = [] # 存放原始数据集对象
        self.OrderedFile = []  # 存放排序结果
        self.OrderSeeds = []   # 存放邻居对象
        
        # 初始化原始数据对象
        for i in range(self.X.shape[0]):
            currentObject = Object(self.X[i,0], self.X[i,1])
            self.SetOfObjects.append(currentObject)
                
        for obj in self.SetOfObjects:
            if not obj.Processed:
                self.ExpandClusterOrder(obj, self.MinPts) 
        
    def OrderSeedsUpdate(self, CenterObject):
        c_dist = CenterObject.core_distance
        for obj in self.SetOfObjects:
            if not obj.Processed:
                new_r_dist = max(c_dist, CenterObject.dist(obj))
                if obj.reachability_distance == 'UNDEFINED':
                    obj.reachability_distance = new_r_dist
                    self.OrderSeeds.append(obj)
                elif new_r_dist < obj.reachability_distance:
                    obj.reachability_distance = new_r_dist
        self.OrderSeeds.sort(key=lambda x: x.reachability_distance)
        
    def ExpandClusterOrder(self, obj, MinPts):
        obj.Processed = True
        obj.core_distance = obj.setCoreDistance(MinPts, self.SetOfObjects)
        self.OrderedFile.append(obj)
        
        self.OrderSeedsUpdate(obj)
        while len(self.OrderSeeds)>0:
            currentObject = self.OrderSeeds[0] 
            del self.OrderSeeds[0] 
            currentObject.Processed = True 
            currentObject.core_distance = currentObject.setCoreDistance(MinPts, self.SetOfObjects)
            self.OrderedFile.append(currentObject)
            self.OrderSeedsUpdate(currentObject)
# 测试样本
np.random.seed(0)
n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))
# OPTICS密度聚类
OrderedFile = OPTICS(X, 20).OrderedFile
# 可达距离图
r_dis_list = []
for i in OrderedFile:
    r_dis_list.append(i.reachability_distance) 
    
plt.figure(figsize=(15, 5))
plt.scatter(list(range(len(r_dis_list[1:]))), r_dis_list[1:]);

密度聚类算法OPTICS实现_第1张图片

你可能感兴趣的:(机器学习)