机器学习--DBSCAN(密度聚类算法)

算法描述为西瓜书上所述:
机器学习--DBSCAN(密度聚类算法)_第1张图片

通俗来讲:
1. 遍历每个点,然后计算剩余的点到该点的距离,如果小与sigma的点的个数大于MinPts, 则加到对象集omega中
2.循环体中,随机选取一个点,然后生成聚类簇C_k

import numpy as np
from sklearn import preprocessing
import random
import matplotlib.pyplot as plt
import math

def distance(x,y):

    return math.sqrt(math.pow(x[ 0 ] - y[ 0 ], 2) + math.pow(x[ 1 ] - y[ 1 ], 2))


def dbscan(data, sigma, MinPts):

    omega = set()
    k = 0
    T = set(data)
    Q = []
    output = []
    for d in data:
        if len([i for i in data if distance(i, d) < sigma]) >= MinPts:
            omega.add(d)

    while len(omega):
        T_old = T
        o = random.choice(list(omega))
        Q.append(o)
        T = T - set(o)
        while len(Q):
            q = Q.pop(0)
            N_sigma = [i for i in data if distance(i, q) < sigma]
            if len(N_sigma)  >= MinPts:
                delta = T & set(N_sigma)
                Q += list(delta)
                T -= delta
        k += 1
        C_k = list(T_old - T)
        omega -= set(C_k)
        output.append(C_k)
    return output
if __name__ == '__main__':
    data = [[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318],
            [0.556,0.215],[0.403,0.237],[0.481,0.149],[0.437,0.211],[0.666,0.091],
            [0.243,0.276],[0.245,0.057],[0.343,0.099],[0.639,0.161],[0.657,0.198],[0.360,0.370],
            [0.593,0.042],[0.719,0.103],[0.359,0.188],[0.339,0.241],[0.282,0.257],
            [0.748,0.232],[0.714,0.346],[0.483,0.312],[0.478,0.437],[0.525,0.369],[0.751,0.489],
            [0.532,0.472],[0.473,0.376],[0.725,0.445],[0.446,0.459]]
    data = [(i,j) for i,j in data]

    res = dbscan(data, 0.11, 5)
    draw(res)

机器学习--DBSCAN(密度聚类算法)_第2张图片

你可能感兴趣的:(机器学习)