手写kmeans算法实现聚类

# -*- coding: utf-8 -*-
"""
Created on Mon Feb  5 11:58:12 2018

@author: Administrator
"""
# -*- coding: utf-8 -*-
"""
Created on Sat Feb  3 13:51:03 2018

@author: Administrator
"""
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import random
import numpy as np

#该函数用来计算两个数组直接的距离
def get_distance(x,y):
    d=sum([(x[i]-y[i])*(x[i]-y[i]) for i in range(len(x))])
    return d      

#k代表类别值,X代表输入,K代表簇的个数
def get_center(X,k,center):
    #定义数组distance用来存放距离值
    #数组y_pred用来储存最小距离所对应的类别值.
    y_pred=[]   
    for i in range(len(X)):
        distance=[]
        for j in range(len(center)):
            #print(center[j])
            distance.append(get_distance(X[i],center[j]))
        y_pred.append(distance.index(min(distance)))#最小距离对应的下标           
    #求每个类别值的所在数组的中心点   
    for i in range(k):
        t=[]
        for j in range(len(X)):
            if y_pred[j]==i:
                t.append(X[j])
        p=[]
        for j in range(len(X[0])):
            p.append(sum([x[j] for x in t])/len(t))
        center[i]=p
    return center          
n_samples=1500#样本点数量
random_state = 170
X,y = make_blobs(n_samples=n_samples, random_state=random_state)
k=4
temp_center=[]
for i in range(k):
     #定义数组center,从数组X随机取k个点,假设为k个类别值对应的中心点,
     #该数组的index代表类别值,总共有k个类别
    temp_center.append(X[random.randint(0,n_samples)])
    center=np.array(temp_center)
n=0
while n<9:    
    center=get_center(X,k,center)
    n=n+1  
last_y=[]   
for i in range(len(X)):
    last_distance=[]
    for j in range(len(center)):
        last_distance.append(get_distance(X[i],center[j]))
    last_y.append(last_distance.index(min(last_distance)))
last_y=np.array(last_y)
# Incorrect number of clusters
#y_pred = KMeans(n_clusters=k, random_state=random_state).fit_predict(X)
plt.figure(figsize=(12,12))
plt.scatter(X[:,0],X[:,1],c=last_y)
plt.show()

 

你可能感兴趣的:(人工智能)