// k-means.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include <fstream> #include <math.h> #include <vector> #include <iostream> #define k 3 using namespace std; //存放元组的属性信息 struct Tuple { float attr1; float attr2; }; //计算两个元组的欧几里得距离 float getDistXY(Tuple t1, Tuple t2) { return sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) + (t1.attr2 - t2.attr2)*(t1.attr2 - t2.attr2)); } //根据质心,决定当前元组属于哪个簇 int clusterOfTuple(Tuple means[], Tuple tuple) { float dist = getDistXY(means[0], tuple); float tmp; int label = 0;//标记属于哪一个簇 for (int i = 1; i < k; i++) { tmp = getDistXY(means[i], tuple); if (tmp < dist) { dist = tmp; label = i; } } return label; } //获得给定簇集的平方误差 float getVar( vector<Tuple> clusters[], Tuple means[] ) { float var = 0; for (int i = 0; i < k; i++) { vector<Tuple> t = clusters[i]; for (int j = 0; j < t.size(); j++) { var += getDistXY(t[j], means[i]); } } return var; } //获得当前簇的均值(质心) Tuple getMeans(vector<Tuple> cluster) { int num = cluster.size(); double meansX = 0; double meansY = 0; Tuple t; for (int i = 0; i < num; i++) { meansX += cluster[i].attr1; meansY += cluster[i].attr2; } t.attr1 = meansX / num; t.attr2 = meansY / num; return t; } void KMeans(vector<Tuple> tuples) { vector<Tuple> clusters[k]; //设置有几簇 Tuple means[k]; //设置每簇的质心 int i = 0; //默认一开始将前k个元组的值作为k个簇的质心 for (; i < k; i++) { means[i].attr1 = tuples[i].attr1; means[i].attr2 = tuples[i].attr2; } //看当前元组属于哪个簇,并将其导入 int label = 0; for (i = 0; i != tuples.size(); i++) { label = clusterOfTuple(means, tuples[i]); clusters[label].push_back(tuples[i]); } //输出刚开始的簇 for (label = 0; label < k; label++) { cout << "第" <<label + 1 << "个簇:"<<endl; vector<Tuple> t = clusters[label]; for ( i = 0; i < t.size(); i++) { cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t"; } cout<<endl; } float oldVar = -1; float newVar = getVar(clusters, means); while (abs(newVar - oldVar) >= 1) //当新旧函数值即平方差不到1即则准函数值不发生明显变化时,算法终止 { for (i = 0; i < k; i++) //更新每个簇的中心点 { means[i] = getMeans(clusters[i]); } oldVar = newVar; newVar = getVar(clusters, means); //清空每个簇 for (i = 0; i < k; i++) { clusters[i].clear(); } //根据新的质心获得新的簇 for (i = 0; i != tuples.size(); i++) { label = clusterOfTuple(means, tuples[i]); clusters[label].push_back(tuples[i]); } //输出当前簇 for (label = 0; label < k; label++) { cout << "第" <<label + 1 << "个簇:"<<endl; vector<Tuple> t = clusters[label]; for ( i = 0; i < t.size(); i++) { cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t"; } cout<<endl; } } } int _tmain(int argc, _TCHAR* argv[]) { const char* fileName = "C:\\Users\\sony\\Desktop\\data.txt"; ifstream infile(fileName); if (!infile.is_open()) { cout<<"不能打开输入文件"<<fileName<<endl; } int count = 0; vector<Tuple> tuples; Tuple tuple; //从文件流中读入数据 while (!infile.eof()) { count++; if (count%2 == 1) { infile>>tuple.attr1; }else { infile>>tuple.attr2; tuples.push_back(tuple); } } //输出文件中的元组信息 for (auto it = tuples.begin(); it != tuples.end(); it++) { cout << "(" << (*it).attr1 <<", "<<(*it).attr2<<")"<<"\t"; } cout << endl; KMeans(tuples); system("pause"); return 0; }
原博文链接:点击打开链接
以上为二维,一维则为以下情况,这里注意在用ifstream读取数据时,如果以infile.eof()来判断结尾,则最后会出现读取2次最后数据的现象,stream 中的 eofbit 标记是在尝试读取文件结尾时才设立的读取完最后一个数据后,fstream 仍处于正常状态,所以下一次 while 判断不会跳出,再次 fin>>x 时,fstream 发现没有数据可读,此时才会设立 failbit。但由于已进入循环,虽然未读数据,x 仍保留上次的值,所以就又一次 pushback 了。
代码如下:
// text.cpp : 定义控制台应用程序的入口点。 #include "stdafx.h" #include <fstream> #include <math.h> #include <vector> #include <iostream> #define k 3 using namespace std; //存放元组的属性信息 struct Tuple { float attr1; /*float attr2;*/ }; //计算两个元组的欧几里得距离 float getDistXY(Tuple t1, Tuple t2) { return sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) ); } //根据质心,决定当前元组属于哪个簇 int clusterOfTuple(Tuple means[], Tuple tuple) { float dist = getDistXY(means[0], tuple); float tmp; int label = 0;//标记属于哪一个簇 for (int i = 1; i < k; i++) { tmp = getDistXY(means[i], tuple); if (tmp < dist) { dist = tmp; label = i; } } return label; } //获得给定簇集的平方误差 float getVar( vector<Tuple> clusters[], Tuple means[] ) { float var = 0; for (int i = 0; i < k; i++) { vector<Tuple> t = clusters[i]; for (int j = 0; j < t.size(); j++) { var += getDistXY(t[j], means[i]); } } return var; } //获得当前簇的均值(质心) Tuple getMeans(vector<Tuple> cluster) { int num = cluster.size(); double meansX = 0; Tuple t; for (int i = 0; i < num; i++) { meansX += cluster[i].attr1; } t.attr1 = meansX / num; return t; } void KMeans(vector<Tuple> tuples) { vector<Tuple> clusters[k]; //设置有几簇 Tuple means[k]; //设置每簇的质心 int i = 0; //默认一开始将前k个元组的值作为k个簇的质心 for (i = 0; i < k; i++) { means[i].attr1 = tuples[i].attr1; } //看当前元组属于哪个簇,并将其导入 int label = 0; for (i = 0; i != tuples.size(); i++) { label = clusterOfTuple(means, tuples[i]); clusters[label].push_back(tuples[i]); } ////输出刚开始的簇 //for (label = 0; label < k; label++) //{ // cout << "第" <<label + 1 << "个簇:"<<endl; // vector<Tuple> t = clusters[label]; // for ( i = 0; i < t.size(); i++) // { // cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t"; // } // cout<<endl; //} float oldVar = -1; float newVar = getVar(clusters, means); while (abs(newVar - oldVar) >= 1) //当新旧函数值即平方差不到1即则准函数值不发生明显变化时,算法终止 { for (i = 0; i < k; i++) //更新每个簇的中心点 { means[i] = getMeans(clusters[i]); } oldVar = newVar; newVar = getVar(clusters, means); //清空每个簇 for (i = 0; i < k; i++) { clusters[i].clear(); } //根据新的质心获得新的簇 for (i = 0; i != tuples.size(); i++) { label = clusterOfTuple(means, tuples[i]); clusters[label].push_back(tuples[i]); } ////输出当前簇 //for (label = 0; label < k; label++) //{ // /*cout << "第" <<label + 1 << "个簇:"<<endl; // vector<Tuple> t = clusters[label]; // for ( i = 0; i < t.size(); i++) // { // cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t"; // } // cout<<endl;*/ // cout << "第" <<label + 1 << "个簇:"<<endl; // vector<Tuple> t = clusters[label]; // for ( i = 0; i < t.size(); i++) // { // cout << t[i].attr1 <<"\t"; // } // cout<<endl; } //输出当前簇 for (label = 0; label < k; label++) { /*cout << "第" <<label + 1 << "个簇:"<<endl; vector<Tuple> t = clusters[label]; for ( i = 0; i < t.size(); i++) { cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t"; } cout<<endl;*/ cout << "第" <<label + 1 << "个簇:"<<endl; vector<Tuple> t = clusters[label]; for ( i = 0; i < t.size(); i++) { cout << t[i].attr1 <<"\t"; } cout<<endl; } } int _tmain(int argc, _TCHAR* argv[]) { const char* fileName = "C:\\Users\\sony\\Desktop\\data.txt"; ifstream infile(fileName, ios::in); if (!infile.is_open()) { cout<<"不能打开输入文件"<<fileName<<endl; } int count = 0; vector<Tuple> tuples; Tuple tuple; //从文件流中读入数据 while (infile>>tuple.attr1) { /*count++; if (count%2 == 1) { infile>>tuple.attr1; }else { infile>>tuple.attr1; tuples.push_back(tuple); }*/ tuples.push_back(tuple); } ////输出文件中的元组信息 //for (auto it = tuples.begin(); it != tuples.end(); it++) //{ // cout << "(" << (*it).attr1 <<", "<<(*it).attr2<<")"<<"\t"; //} cout << endl; KMeans(tuples); system("pause"); return 0; }