hello,all
上节采用python实现了决策树,本节使用python实现k-means算法,后一节将会采用map-reduce实现k-means算法
算法程序如下:
算法代码如下:
# coding=utf-8
import pprint
import uniout
import math
from collections import Counter
import copy as cp
import random as rd
import matplotlib
import matplotlib.pyplot as plt
'''
@author :chenyuqing
@mail :[email protected]
'''
from numpy import *
def load_data(path):
'''
:param path:传递路径,返回样例的数据
:return:
'''
data_set=[]
file_object=open(path)
for line in file_object.readlines():
lineArr = line.strip().split('\t')
lineArr = [float(x) for x in lineArr] #将字符串转换成数字
data_set.append(lineArr)
data_set=array(data_set)
return data_set
def my_kmeans(k ,data_set):
'''
:param k:
:param data_set:
:return:
'''
sample_data_index=rd.sample(list(range(0,len(data_set))),k)
start_list=[] #定义起始的结果向量
end_list=[[0,0] for n in range(k)]#定义结束的向量
end_result=[[] for n in range(k)]# 分类完毕后的结果
for temp in sample_data_index:
start_list.append(data_set[temp].tolist())
iter_n=10
while(start_list<>end_list): #
for i in range(0,len(data_set)):
temp_distance=float("inf")
temp_result=0
for j in range(0,len(start_list)):
distance= math.sqrt(math.pow(data_set[i][0]-start_list[j][0],2)+math.pow(data_set[i][1]-start_list[j][1],2))
if distance
结果如下:
[[0.697, 0.46], [0.744, 0.376], [0.634, 0.264], [0.608, 0.318], [0.639, 0.161], [0.657, 0.198], [0.719, 0.103], [0.748, 0.232], [0.714, 0.346], [0.751, 0.489], [0.725, 0.445]]
[[0.403, 0.237], [0.243, 0.267], [0.36, 0.37], [0.339, 0.241], [0.282, 0.257], [0.483, 0.312], [0.478, 0.437], [0.525, 0.369], [0.532, 0.472], [0.473, 0.376], [0.446, 0.459]]
[[0.556, 0.215], [0.481, 0.149], [0.437, 0.211], [0.666, 0.091], [0.245, 0.057], [0.343, 0.099], [0.593, 0.042], [0.359, 0.188]]
11
结果展示
0.697 0.46
0.744 0.376
0.634 0.264
0.608 0.318
0.556 0.215
0.403 0.237
0.481 0.149
0.437 0.211
0.666 0.091
0.243 0.267
0.245 0.057
0.343 0.099
0.639 0.161
0.657 0.198
0.36 0.37
0.593 0.042
0.719 0.103
0.359 0.188
0.339 0.241
0.282 0.257
0.748 0.232
0.714 0.346
0.483 0.312
0.478 0.437
0.525 0.369
0.751 0.489
0.532 0.472
0.473 0.376
0.725 0.445
0.446 0.459
后续将会将其采用mr程序重新编写,敬请关注。
ths