初始化数据:
def compare(a):
return a[0]
def distance(d, p):
res = 0.0
for i in range(len(d)):
res += pow(d[i] - p[i], 2)
return pow(res, 0.5)
def initData_normal(num, u, x):
"""
以均值:x,协方差:u的正态分布生成num个数据
:param num: 数据数量
:param u: 正态分布协方差
:param x: 正态分布均值
:return:
"""
data_label.append(np.random.multivariate_normal(u, x, num))
data.extend(data_label[-1])
def initData_circular(num, f):
X, Y = make_circles(n_samples=num, noise=0.1, factor=f)
X1 = []
X2 = []
for i in range(len(Y)):
if Y[i] == 0:
X1.append(X[i])
else:
X2.append(X[i])
data_label.append(X1)
data_label.append(X2)
data.extend(X)
KMM / Kmedoids:
class KMM:
def __init__(self, k, d):
"""
:param k: 中心点个数
:param d: 输入数据
"""
self.p = [[] for _ in range(k)]
self.d = d
self.dividual = [[] for _ in range(k)]
def div(self):
self.dividual = [[] for _ in range(len(self.dividual))]
for i in range(len(self.d)):
index = -1
min_dis = float('inf')
for j in range(len(self.p)):
dis = distance(self.d[i], self.p[j])
if dis < min_dis:
min_dis = dis
index = j
if index != -1:
self.dividual[index].append(i)
def move(self):
"""
用于KMM
:return: 相对上次迭代,中心的位置是否发生变化?
"""
temp = self.p.copy()
self.p = [[0, 0] for _ in range(len(self.p))]
for i in range(len(self.p)):
for j in self.dividual[i]:
self.p[i][0] += self.d[j][0] / len(self.dividual[i])
self.p[i][1] += self.d[j][1] / len(self.dividual[i])
for i in range(len(self.p)):
for j in range(len(self.p[i])):
if temp[i][j] != self.p[i][j]:
return True
return False
def move_d(self):
"""
用于Kmedoids
:return:
"""
temp = self.p.copy()
self.p = [[0, 0] for _ in range(len(self.p))]
for i in range(len(self.p)):
td = []
for j in self.dividual[i]:
td.append([distance(self.d[j], self.p[i]), j])
td.sort(key=compare)
if len(td) > 0:
self.p[i] = self.d[td[int(len(td)/2)][1]].copy()
for i in range(len(self.p)):
for j in range(len(self.p[i])):
if temp[i][j] != self.p[i][j]:
return True
return False
def show(self):
dl = [[] for _ in range(len(self.dividual))]
for i in range(len(self.dividual)):
for j in self.dividual[i]:
dl[i].append(self.d[j])
for i in range(len(dl)):
plt.plot(*np.array(dl[i]).T, '.', label='s' + str(i))
plt.plot(*np.array(self.p).T, '*', label='p')
print(self.p)
plt.axis('scaled')
plt.legend()
plt.show()
def run(self, m_d, max_t=20):
"""
:param m_d: 选择kmm('m')/kmedoids('d')
:param max_t: 最大迭代次数
"""
self.p = [[] for _ in range(len(self.p))]
for i in range(len(self.p)):
index = random.randint(0, len(data)-1)
self.p[i].extend([data[index][0], data[index][1]])
flag = True
if m_d == 'd':
while flag and max_t > 0:
self.div()
flag = self.move_d()
max_t -= 1
else:
while flag and max_t > 0:
self.div()
flag = self.move()
max_t -= 1
self.show()
main:
if __name__ == '__main__':
initData_normal(50, [5, 5], [[1, 0], [0, 1]])
initData_normal(50, [5, -5], [[1, 0], [0, 1]])
initData_normal(50, [-5, 5], [[1, 0], [0, 1]])
initData_normal(50, [-5, -5], [[1, 0], [0, 1]])
initData_normal(5, [-50, -50], [[1, 0], [0, 1]])
initData_normal(5, [-30, -30], [[1, 0], [0, 1]])
kmm = KMM(4, data)
kmm.run('m')
kmm.run('d')
结果:
KMM
受离群点影响较大 
Kmedoids:
受离群点影响较小
DBScan:
class DBScan:
def __init__(self, th, core_num, d):
"""
:param th: 核心圆的半径
:param core_num: 核心对象圆内最少数目
:param d: 数据
"""
self.th = th
self.core_num = core_num
self.d = d
self.classes = [-1] * len(d)
self.clusters = []
self.sum = len(d)
def spread(self):
sum = self.sum
while sum != 0:
index = random.randint(0, len(self.d) - 1)
while self.classes[index] != -1:
index = (index + 1) % len(self.d)
self.classes[index] = len(self.clusters)
self.clusters.append([index])
sum -= 1
for i in self.clusters[-1]:
for j in range(len(self.d)):
if self.classes[j] == -1 and distance(self.d[i], self.d[j]) <= self.th:
self.classes[j] = self.classes[index]
self.clusters[-1].append(j)
sum -= 1
def show(self):
temp = []
for c in self.clusters:
temp.append([])
for i in c:
temp[-1].append(self.d[i])
temp[-1] = np.array(temp[-1])
show(np.array(temp))
main:
if __name__ == '__main__':
initData_circular(1000, 0.1)
dbscan = DBScan(0.2, 10, data)
dbscan.spread()
dbscan.show()
结果
