【C++】将坐标点进行聚类的Kmeans实现

<p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">Kmeans算法的实现步骤:</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">1、从D中随机取k个元素,作为k个簇的各自的中心。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">2、分别计算剩下的元素到k个簇中心的相异度(元素到簇中心的欧氏距离),将这些元素分别划归到相异度最低的簇。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">3、根据聚类结果,重新计算k个簇各自的中心,计算方法是取簇中所有元素各自维度的算术平均数。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">4、将D中全部元素按照新的中心重新聚类。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">5、重复第4步,直到聚类结果不再变化。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">6、将结果输出。</span></span></span></p>
#include "stdafx.h"
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <sstream>
#include <cmath>

#define k 3

using namespace std;

const int data[11][2]={{2,10},{2,5},{8,4},{5,8},{7,5},{6,4},{1,2},{4,9},{7,3},{1,3},{3,9}};

//坐标点结构体
struct Sample
{
	int x;
	int y;
};

int getDistance(Sample pt1, Sample pt2)
{
	return (pt1.x-pt2.x)*(pt1.x-pt2.x)+(pt1.y-pt2.y)*(pt1.y-pt2.y);
}

int getCluster(Sample means[], Sample point)
{
	int temp=0;
	int label=0;
	int distance=getDistance(means[0], point);
	vector<Sample> cluster[k]; 

	for(int i=1; i<k; i++)
	{
		temp=getDistance(means[i], point);
		if(temp<distance)
		{
			distance=temp;
			label=i;
		}
	}
	return label;
}

Sample getMeans(vector<Sample> cluster) //cluster contains all points in the cluster
{
	float meansX=0;
	float meansY=0;
	Sample newMean;
	int num=cluster.size();

	for(int i=0; i<num; i++)
	{
		meansX+=cluster[i].x;
		meansY+=cluster[i].y;
	}

	newMean.x=meansX/num;
	newMean.y=meansY/num;

	return newMean;
}

float getVar(vector<Sample> clusters[],Sample means[])
{
	float var = 0;
	for (int i = 0; i < k; i++)
	{
		vector<Sample> pt = clusters[i];
		for (int j = 0; j< pt.size(); j++)
		{
			var += getDistance(pt[j],means[i]);
		}
	}
	return var;
}

void Kmeans(const int data[11][2])
{
	Sample means[k]; 
	vector<Sample> cluster[k]; 

	for(int i=0; i<k; i++) 
	{
		means[i].x=data[i][0];
		means[i].y=data[i][1];
	}

	Sample pt;
	int label=0;
	for(int i=0; i<11; i++)
	{
		pt.x=data[i][0];
		pt.y=data[i][1];

		label=getCluster(means, pt); 
		cluster[label].push_back(pt); 
	}

	float oldVar=-1;
	float newVar=getVar(cluster,means);
	
	while(newVar!=oldVar) 
	{
		for(int i=0; i<k; i++)
		{
			means[i]=getMeans(cluster[i]); 
		}

		oldVar = newVar;
		newVar = getVar(cluster,means); 
		for(int i=0; i<k; i++)
		{
			cluster[i].clear(); 
		}

		for(int i=0; i<11; i++)
		{
			Sample pt;
			pt.x=data[i][0];
			pt.y=data[i][1];

			label=getCluster(means, pt); 
			cluster[label].push_back(pt); 
		}
		
		for(label=0; label<3; label++)
		{
			cout<<"第"<<label+1<<"个簇中的元素是:"<<endl;
			vector<Sample> pt;
			pt=cluster[label];

			for(int i=0; i<pt.size(); i++)
			{
				cout<<"("<<pt[i].x<<","<<pt[i].y<<")"<<"    "<<endl;
			}
		}
	}
}

int main()
{
	Kmeans(data);
}

你可能感兴趣的:(【C++】将坐标点进行聚类的Kmeans实现)