<p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">Kmeans算法的实现步骤:</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">1、从D中随机取k个元素,作为k个簇的各自的中心。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">2、分别计算剩下的元素到k个簇中心的相异度(元素到簇中心的欧氏距离),将这些元素分别划归到相异度最低的簇。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">3、根据聚类结果,重新计算k个簇各自的中心,计算方法是取簇中所有元素各自维度的算术平均数。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">4、将D中全部元素按照新的中心重新聚类。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">5、重复第4步,直到聚类结果不再变化。</span></span></span></p><p style="margin: 15px auto 10px; padding-top: 0px; padding-bottom: 0px; font-family: Arial; line-height: 22px; text-align: justify;"><span style="margin: 0px; padding: 0px;"><span style="font-family: 'Courier New';"><span style="font-size:12px;">6、将结果输出。</span></span></span></p>
#include "stdafx.h" #include <iostream> #include <vector> #include <string> #include <fstream> #include <sstream> #include <cmath> #define k 3 using namespace std; const int data[11][2]={{2,10},{2,5},{8,4},{5,8},{7,5},{6,4},{1,2},{4,9},{7,3},{1,3},{3,9}}; //坐标点结构体 struct Sample { int x; int y; }; int getDistance(Sample pt1, Sample pt2) { return (pt1.x-pt2.x)*(pt1.x-pt2.x)+(pt1.y-pt2.y)*(pt1.y-pt2.y); } int getCluster(Sample means[], Sample point) { int temp=0; int label=0; int distance=getDistance(means[0], point); vector<Sample> cluster[k]; for(int i=1; i<k; i++) { temp=getDistance(means[i], point); if(temp<distance) { distance=temp; label=i; } } return label; } Sample getMeans(vector<Sample> cluster) //cluster contains all points in the cluster { float meansX=0; float meansY=0; Sample newMean; int num=cluster.size(); for(int i=0; i<num; i++) { meansX+=cluster[i].x; meansY+=cluster[i].y; } newMean.x=meansX/num; newMean.y=meansY/num; return newMean; } float getVar(vector<Sample> clusters[],Sample means[]) { float var = 0; for (int i = 0; i < k; i++) { vector<Sample> pt = clusters[i]; for (int j = 0; j< pt.size(); j++) { var += getDistance(pt[j],means[i]); } } return var; } void Kmeans(const int data[11][2]) { Sample means[k]; vector<Sample> cluster[k]; for(int i=0; i<k; i++) { means[i].x=data[i][0]; means[i].y=data[i][1]; } Sample pt; int label=0; for(int i=0; i<11; i++) { pt.x=data[i][0]; pt.y=data[i][1]; label=getCluster(means, pt); cluster[label].push_back(pt); } float oldVar=-1; float newVar=getVar(cluster,means); while(newVar!=oldVar) { for(int i=0; i<k; i++) { means[i]=getMeans(cluster[i]); } oldVar = newVar; newVar = getVar(cluster,means); for(int i=0; i<k; i++) { cluster[i].clear(); } for(int i=0; i<11; i++) { Sample pt; pt.x=data[i][0]; pt.y=data[i][1]; label=getCluster(means, pt); cluster[label].push_back(pt); } for(label=0; label<3; label++) { cout<<"第"<<label+1<<"个簇中的元素是:"<<endl; vector<Sample> pt; pt=cluster[label]; for(int i=0; i<pt.size(); i++) { cout<<"("<<pt[i].x<<","<<pt[i].y<<")"<<" "<<endl; } } } } int main() { Kmeans(data); }