k-means聚类算法——c语言

程序代码:

#include"stdio.h"
#include"stdlib.h"
#include
using namespace std;

#define N 11
#define k 3

typedef struct{
    float x;
    float y;
}Point;

Point point[N] = {
        { 2.0, 10.0 }, { 2.0, 5.0 }, { 8.0, 4.0 }, { 5.0, 8.0 }, {7.0,5.0},
        { 6.0, 4.0 }, { 1.0, 2.0 }, { 4.0, 9.0 }, { 7.0, 3.0 }, { 1.0, 3.0 }, {3.0,9.0}
};

int center[N];
Point mean[k];

float getdistance(Point point1, Point point2);
void cluster();
float gete();
void getmean(int center[N]);

int main()
{
    //初始化k个中心点,这里选择给定中心点,而不是随机生成,需要更多的先验知识
    //若没有相关先验知识,可选择随机生成初始中心点
    mean[0].x = point[0].x;
    mean[0].y = point[0].y;

    mean[1].x = point[3].x;
    mean[1].y = point[3].y;

    mean[2].x = point[6].x;
    mean[2].y = point[6].y;
    int number=0;
    float temp1, temp2;
    //第一次聚类
    cluster();
    number++;//number统计进行了几次聚类
    //对第一次聚类的结果进行误差平方和的计算   
    temp1 = gete();
    printf("the error1 is:%f\n", temp1);
    //针对第一次聚类的结果,重新计算聚类中心
    getmean(center);
    //第二次聚类
    cluster();
    number++;
    temp2 = gete();
    printf("the error2 is:%f\n", temp2);

    //迭代循环,直到两次迭代误差的差值在一定阈值范围内,则迭代停止
    while (fabs(temp1 - temp2) > 0.5)
    {
        temp1 = temp2;
        getmean(center);
        cluster();
        temp2 = gete();
        number++;
        printf("the error%d is:%f\n", number,temp2);
    }
    printf("the total number of cluster is:%d\n", number);

    system("pause");
    return 0;

}

//计算距离函数,欧式距离
float getdistance(Point point1, Point point2)
{
    float d;
    d = sqrt((point1.x - point2.x)*(point1.x - point2.x) + (point1.y - point2.y)*(point1.y - point2.y));
    return d;
}
//聚类函数
void cluster()
{
    float distance[N][k];
    for (int i = 0; i < N; i++)
    {
        for (int j = 0; j < k; j++)
        {
            distance[i][j] = getdistance(point[i], mean[j]);
        }
        float min = 9999.0;
        for (int j = 0; j < k; j++)
        {
            if (distance[i][j] < min)
            {
                min = distance[i][j];
                center[i] = j;
            }
        }
        printf("(%.0f,%.0f)\t in cluster-%d\n", point[i].x, point[i].y, center[i] + 1);
    }
}
//聚类后误差计算函数
float gete()
{
    float cnt=0, sum=0;
    for (int i = 0; i < N; i++)
    {
        for (int j = 0; j < k; j++)
        {
            if (center[i] == j)
            {
                cnt = getdistance(point[i], mean[j]);
            }
        }
        sum += cnt;     
    }
    return sum;
}

//重新计算聚类中心
void getmean(int center[N])
{
    Point sum;
    int count;
    for (int i = 0; i < k; i++)
    {
        sum.x = 0.0;
        sum.y = 0.0;
        count = 0;
        for (int j = 0; j < N; j++)
        {
            if (center[j] == i)
            {
                sum.x += point[j].x;
                sum.y += point[j].y;
                count++;
            }
        }
        mean[i].x = sum.x / count;
        mean[i].y = sum.y / count;
    }
    for (int i = 0; i < k; i++)
    {
        printf("the new center point of %d is:\t(%f,%f)\n", i + 1, mean[i].x, mean[i].y);
    }
}

程序结果:
k-means聚类算法——c语言_第1张图片

参考:http://blog.csdn.net/triumph92/article/details/41128049

你可能感兴趣的:(机器学习算法)