周志华《机器学习》中的层次聚类算法太简单了,这个算法里考虑到了多个子类聚成同一个父类的情况。
但是时间精力有限,没有办法实现完美的树状输出,Bonus中我会改进。
# -*- coding: utf-8 -*-
__author__ = 'Zhao'
import math
import numpy as np
def judge_element_append(list_input):
if isinstance(list_input, list):
for element in list_input:
temp_class.append(element)
else:
temp_class.append(list_input)
def judge_element_delete(list_input, aim_list, match_num):
for list_element in list_input:
if isinstance(list_element, list):
for element in list_element:
if element == match_num:
del aim_list[list_input.index(list_element)]
else:
if list_element == match_num:
del aim_list[list_input.index(list_element)]
group = []
group = input("Please input some numbers spit as blank:\n").split(" ")
group_num = len(group)
centroid = []
for i in range(group_num):
centroid.append(group[i])
print("centroid is ", centroid, "\n")
times = 0
# auto-increment
while group_num != 1:
group_num = len(group)
print("the numbers of groups now is ", group_num, "\n")
matrix = [[] for i in range(group_num)]
for i in range(group_num):
for j in range(group_num):
distance = abs(int(centroid[i]) - int(centroid[j]))
matrix[i].append(distance)
print("distance matrix :")
for i in range(group_num):
print(matrix[i])
# matrix contains the distance between every two elements
print("------------")
max_in_matrix = 0
for i in range(group_num):
for j in range(group_num):
max_in_matrix = max_in_matrix if max_in_matrix > matrix[i][j] else matrix[i][j]
# print(max_in_matrix)
for i in range(group_num):
for j in range(group_num):
matrix[i][j] /= max_in_matrix
matrix[i][j] = round(1 - matrix[i][j], 3) if round(1 - matrix[i][j], 3) != 1 else 0
print("standard matrix :")
for i in range(group_num):
print(matrix[i])
print("------------")
# standard the matrix
max_in_matrix = 0
for i in range(group_num):
for j in range(group_num):
max_in_matrix = max_in_matrix if max_in_matrix > matrix[i][j] else matrix[i][j]
print("max similarity in the matrix: ", max_in_matrix, "\n")
# find the max similarity in this matrix
if max_in_matrix == 0:
temp_class = []
for i in range(group_num):
judge_element_append(group[i])
# print("last temp_group = ", temp_class)
for i in range(len(temp_class)):
judge_element_delete(group, centroid, temp_class[i])
judge_element_delete(group, group, temp_class[i])
group.append(temp_class)
print("[CONCLUSION]: ", group)
break
temp_class = []
index1 = 0
index2 = 0
flag = 0
for i in range(group_num):
for j in range(group_num):
if matrix[i][j] == max_in_matrix:
index1 = i
index2 = j
flag = 1
judge_element_append(group[i])
judge_element_append(group[j])
if flag == 1:
break
# find the first center index of new group
group_num = len(group)
# print(group_num)
for i in range(group_num):
if matrix[index1][i] == max_in_matrix and i != index2:
judge_element_append(group[i])
# group_num = len(group)
for i in range(group_num):
if matrix[index2][i] == max_in_matrix and i != index1:
judge_element_append(group[i])
times += 1
print("after %dth clustering: " % times)
# print("temp_group = ", temp_class)
for i in range(len(temp_class)):
judge_element_delete(group, centroid, temp_class[i])
judge_element_delete(group, group, temp_class[i])
group.append(temp_class)
print("the new group is ", group)
sum = 0
for i in range(len(temp_class)):
sum += int(temp_class[i])
centroid.append(sum / len(temp_class))
print("the new centroid is ", centroid, "\n")
print("==========EHD OF ONE CLUSTERING==========\n")