import random
import numpy as np
import matplotlib.pyplot as plt
k = 4
round = 0
limit = 10
threshold = 1e-10
data = []
clusters = []
f = open('data.txt', 'r')
for line in f:
data.append(np.array(line.split(' '), dtype=np.string_).astype(np.float64))
mean_vectors = random.sample(data, k)
print(mean_vectors)
while True:
round += 1
change_flag = 0
clusters = []
for i in range(k):
clusters.append([])
for watermelon in data:
c = np.argmin(
list(map(lambda vec: np.linalg.norm(watermelon - vec, ord=2), mean_vectors))
)
clusters[c].append(watermelon)
for i in range(k):
new_vector = np.zeros((1, 2))
for watermelon in clusters[i]:
new_vector += watermelon
new_vector /= len(clusters[i])
change_flag += np.linalg.norm(mean_vectors[i] - new_vector, ord=2)
mean_vectors[i] = new_vector
if round > limit or change_flag < threshold:
break
print('迭代了', round, '轮')
colors = ['green', 'red', 'blue', 'purple']
for i, col in zip(range(k), colors):
for watermelon in clusters[i]:
plt.scatter(watermelon[0], watermelon[1], color=col)
plt.show()
import copy
import math
import random
import numpy as np
limit = 10000
epsl = 0.0000001
def init(data, class_num):
global limit
ans = []
for i in range(0, len(data)):
curr = []
sum = 0.0
for j in range(0, class_num):
a = random.randint(1, limit)
curr.append(a)
sum += a
for j in range(0, class_num):
curr[j] = curr[j] / sum
ans.append(curr)
# 将初始值写进txt方便复制
f = open('fuzzy_start.txt', 'w')
for line in ans:
f.write(str(line) + '\n')
return ans
def distance(watermelon, center):
if len(watermelon) != len(center):
return -1
a = 0.0
for i in range(0, len(watermelon)):
a += abs(watermelon[i] - center[i]) ** 2
return math.sqrt(a)
def end_conditon(ans, old_ans):
global epsl
for i in range(0, len(ans)):
for j in range(0, len(ans[0])):
if abs(ans[i][j] - old_ans[i][j]) > epsl:
return False
return True
def fuzzy(data, class_num, m):
# 初始化
ans = init(data, class_num)
# 循环更新
while (True):
old_ans = copy.deepcopy(ans)
center = []
for j in range(0, class_num):
curr_cluster_center = []
for i in range(0, len(data[0])):
a_sum_num = 0.0
a_sum_dum = 0.0
for k in range(0, len(data)):
# 分子
a_sum_num += (ans[k][j] ** m) * data[k][i]
# 分母
a_sum_dum += (ans[k][j] ** m)
curr_cluster_center.append(a_sum_num / a_sum_dum)
center.append(curr_cluster_center)
distance_matrix = []
for i in range(0, len(data)):
curr = []
for j in range(0, class_num):
curr.append(distance(data[i], center[j]))
distance_matrix.append(curr)
for j in range(0, class_num):
for i in range(0, len(data)):
a = 0.0
for k in range(0, class_num):
a += (distance_matrix[i][j] / distance_matrix[i][k]) ** (2 / (m - 1))
ans[i][j] = 1 / a
if end_conditon(ans, old_ans):
# print("1111")
break
return ans
if __name__ == '__main__':
data = []
f = open('data.txt', 'r')
for line in f:
data.append(np.array(line.split(' '), dtype=np.string_).astype(np.float64))
para_matrix = fuzzy(data, 4, 2)
print(para_matrix)
f = open('fuzzy_result.txt', 'w')
for line in para_matrix:
f.write(str(line) + '\n')
(2)我们将(1)中工作重复三次,得到的结果分别如下:
[array([0.243, 0.267]), array([0.483, 0.312]), array([0.473, 0.376]), array([0.639, 0.161])]3轮迭代
误差平方和=0.05109
[array([0.556, 0.215]), array([0.634, 0.264]), array([0.36, 0.37]), array([0.243, 0.267])] 5轮迭代
误差平方和=0.03659
array([0.593, 0.042]), array([0.481, 0.149]), array([0.714, 0.346]), array([0.774, 0.376])迭代5轮
误差平方和=0.04183