k-meals实现对鸢尾花数据集聚类

import pandas as pd
import numpy as np
import random
from matplotlib import pyplot as plt

'''
1、算法描述
输入:iris数据集,提取1,3维作为聚类数据;初始化k=3
输出:聚类结果
1. 随机生成k个初始点作为初试聚类中心,由中心代表各聚类;

2. 计算所有点到这k个中心点的距离,并将点归到离其最近的聚类;

3. 将各个簇中的数据求平均值,作为新的类中心,重复上一步,直到所有的簇不再改变;

'''


# 1. 随机生成k个初始点作为初试聚类中心,由中心代表各聚类
def creat_init(k, data):
    lenght = len(data)
    # print(lenght)
    list = []
    for i in range(0, k):
        random1 = random.randint(0, lenght)
        list.append(random1)
    # print(list)
    return list


# 2. 计算所有点到这k个中心点的距离,并将点归到离其最近的聚类;
def dis__kme(A, B):
    a = np.sqrt(sum(np.power((A - B), 2)))
    return a


# 判断离哪个点距离最近
def lable__min(x, i, SB=[]):
    list_min = min(SB)
    if list_min == SB[0]:
        x[i][2] = 0
    if list_min == SB[1]:
        x[i][2] = 1
    if list_min == SB[2]:
        x[i][2] = 2
    return x


# 距离
def dis__04(D, h):
    for i in range(len(D)):
        dis = []
        for j in range(len(h)):
            A = D[i, 0:2]
            B = h[j, 0:2]  # 选取的k个点
            dis1 = dis__kme(A, B)
            dis.append(dis1)
            # print(dis)
        x1 = lable__min(X, i, dis)
        # print(x1)
    return x1


# 加标签
def lab__1(A):
    for i in range(len(A)):
        A[i][2] = i
    return A


# 可视化
def draw(s):
    x = s[:, 0]
    y = s[:, 1]

    for i in range(150):
        if s[i][2] == 0:
            plt.scatter(x[i], y[i], c='r')
        if s[i][2] == 1:
            plt.scatter(x[i], y[i], c='g')
        if s[i][2] == 2:
            plt.scatter(x[i], y[i], c='b')
    plt.show()


# 均值
def av(x):
    count_1 = 0
    count_2 = 0
    count_3 = 0
    x_1 = 0
    x_2 = 0
    x_3 = 0
    y_1 = 0
    y_2 = 0
    y_3 = 0
    for i in range(len(x)):
        if x[i][2] == 0:
            count_1 = count_1 + 1
            x_1 = x_1 + x[i][0]
            y_1 = y_1 + x[i][1]
        if x[i][2] == 1:
            count_2 = count_2 + 1
            x_2 = x_2 + x[i][0]
            y_2 = y_2 + x[i][1]
        if x[i][2] == 2:
            count_3 = count_3 + 1
            x_3 = x_3 + x[i][0]
            y_3 = y_3 + x[i][1]
    x_1_av = x_1 / count_1
    y_1_av = y_1 / count_1
    x_2_av = x_2 / count_2
    y_2_av = y_2 / count_2
    x_3_av = x_3 / count_3
    y_3_av = y_3 / count_3
    # lable 默认为3
    arr = np.array([x_1_av, y_1_av, 3, x_2_av, y_2_av, 3, x_3_av, y_3_av, 3]).reshape((3, 3))
    # print(arr)
    return arr


data = pd.read_csv("iris.csv", header=None)
data__value = data.values
A = data__value[:, 0]
B = data__value[:, 2]
C = data__value[:, 4]
# print(A.shape)

# 生成一个新的矩阵
X = np.vstack((A, B, C)).T
random4 = creat_init(3, X)
# 生成中心点
center = []
for i in range(len(random4)):
    # print(random4[i])
    h = random4[i]
    center.append(X[h])

h = np.array(center)

# 重新修改标签
lab__1(h)
flag = 1
cn = 0
# 判断聚类中心是否变化 无变化则跳出while 有变化则继续第二步
while (flag):
    x1 = dis__04(X, h)  # 第一次聚类的结果
    hh = av(x1)  # 均值
    # print(hh)
    lab__1(hh)
    # print("hh", hh)
    print("中心", h)
    if (h == hh).all():
        flag = 0
    else:
        h = hh
        cn = cn + 1
# print(hh)
print("执行了", cn)
# print(x1)
draw(x1)

k-meals实现对鸢尾花数据集聚类_第1张图片

你可能感兴趣的:(python,python)