通俗来讲:
1. 遍历每个点,然后计算剩余的点到该点的距离,如果小与sigma的点的个数大于MinPts, 则加到对象集omega中
2.循环体中,随机选取一个点,然后生成聚类簇C_k
import numpy as np
from sklearn import preprocessing
import random
import matplotlib.pyplot as plt
import math
def distance(x,y):
return math.sqrt(math.pow(x[ 0 ] - y[ 0 ], 2) + math.pow(x[ 1 ] - y[ 1 ], 2))
def dbscan(data, sigma, MinPts):
omega = set()
k = 0
T = set(data)
Q = []
output = []
for d in data:
if len([i for i in data if distance(i, d) < sigma]) >= MinPts:
omega.add(d)
while len(omega):
T_old = T
o = random.choice(list(omega))
Q.append(o)
T = T - set(o)
while len(Q):
q = Q.pop(0)
N_sigma = [i for i in data if distance(i, q) < sigma]
if len(N_sigma) >= MinPts:
delta = T & set(N_sigma)
Q += list(delta)
T -= delta
k += 1
C_k = list(T_old - T)
omega -= set(C_k)
output.append(C_k)
return output
if __name__ == '__main__':
data = [[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318],
[0.556,0.215],[0.403,0.237],[0.481,0.149],[0.437,0.211],[0.666,0.091],
[0.243,0.276],[0.245,0.057],[0.343,0.099],[0.639,0.161],[0.657,0.198],[0.360,0.370],
[0.593,0.042],[0.719,0.103],[0.359,0.188],[0.339,0.241],[0.282,0.257],
[0.748,0.232],[0.714,0.346],[0.483,0.312],[0.478,0.437],[0.525,0.369],[0.751,0.489],
[0.532,0.472],[0.473,0.376],[0.725,0.445],[0.446,0.459]]
data = [(i,j) for i,j in data]
res = dbscan(data, 0.11, 5)
draw(res)