IRIS数据采用Kmeans方法的C++实现

IRIS数据采用Kmeans方法的C++实现

参考:http://blog.csdn.net/cai0538/article/details/7061922

#include
#include
#include
#include
#include
#include
#include
#include
#define k 3

using namespace std;

//构建属性结构,读取txt文件中的字符串
struct Tuple {
	string attr1;
	string attr2;
	string attr3;
	string attr4;
	string attr5;
};

//构建属性结构,将数据转换为所需要的数据以及字符串
struct Tuple1 {
	float attr1;
	float attr2;
	float attr3;
	float attr4;
	string attr5;
};

//计算两个样本之间的欧式距离
float getDisXY(Tuple1 t1, Tuple1 t2) {
	float dis = 0;
	dis = sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) + (t1.attr2 - t2.attr2)*(t1.attr2 - t2.attr2)
		+ (t1.attr3 - t2.attr3)*(t1.attr3 - t2.attr3) + (t1.attr4 - t2.attr4)*(t1.attr4 - t2.attr4));
	return dis;
}

//根据质心判断属于那一类
int clusterOfTuple1(Tuple1 means[], Tuple1 tuple1) {
	float dis = getDisXY(means[0], tuple1);
	float temp = dis;
	int label = 0;
	for (int i = 1; i < k; i++) {
		dis = getDisXY(means[i], tuple1);
		if (dis < temp) {
			label = i;
			temp = dis;
		};
	}
	return label;
}

//计算每类质心的平方误差
float getVar(Tuple1 means[], vector cluster[]) {
	float var = 0;
	for (int i = 0; i < k; i++) {
		vector t = cluster[i];
		for (vector::size_type j = 0; j < t.size(); j++) {
			var = var + getDisXY(means[i], t[j]);
		}
	}

	return var;
}

//计算当前类质心的平均值
Tuple1 getMeans(vector cluster) {
	float means1 = 0, means2 = 0, means3 = 0, means4 = 0;
	int num = cluster.size();
	Tuple1 t;
	for (int i = 0; i < num; i++) {
		means1 += cluster[i].attr1;
		means2 += cluster[i].attr2;
		means3 += cluster[i].attr3;
		means4 += cluster[i].attr4;
	}

	t.attr1 = means1/num; t.attr2 = means2/num; t.attr3 = means3/num; t.attr4 = means4/num;

	return t;
}

//Kmeans算法
void Kmeans(vector tuples1) {
	vector cluster[k];
	Tuple1 means[k];
	
	//初始化三个随机数
	int t_num = tuples1.size();
	int rand_num[3];
	srand((unsigned)time(NULL));
	for (int i = 0; i < k; i++) {
		rand_num[i] = rand() % t_num;
		means[i] = tuples1[rand_num[i]];
		cout << rand_num[i] << '\n' << endl;
	}
	
	//根据默认的质心给簇赋值
	int label = 0;
	for (int i = 0; i < t_num; i++) {
		label = clusterOfTuple1(means, tuples1[i]);
		cluster[label].push_back(tuples1[i]);
		}

	//输出刚开始的簇分类
	for (int i = 0; i < k; i++) {
		cout << "第" << i << "簇" << endl;
		vector t = cluster[i];
		for (int j = 0; j < t.size(); j++) {
			cout << t[j].attr1 << " " << t[j].attr2 << " " << t[j].attr3 << " " << t[j].attr4 << " " << endl;
		}
		cout << t.size() << endl;
	}

	//当两次误差小于1时结束学习
	float oldVar = -1;
	float newVar = getVar(means, cluster);
	int i_num = 0;
	cout << "输出初始误差" << newVar << endl;
	while (abs(oldVar - newVar) >= 0.0001) {
		//更新每个簇的均值
		for (int i = 0; i < k; i++) {
			means[i] = getMeans(cluster[i]);
		}
		oldVar = newVar;
		newVar = getVar(means, cluster);
		//清空每个簇
		for (int i = 0; i < k; i++) {
			cluster[i].clear();
		}
		//更新簇
		for (int i = 0; i< t_num; i++) {
			label = clusterOfTuple1(means, tuples1[i]);
			cluster[label].push_back(tuples1[i]);
		}
		i_num++;
	}
	cout << "i_num:" << i_num << endl;
	//输出学习后的分类
	cout << "学习后的分类" << endl;
	for (int i = 0; i < k; i++) {
		cout << "第" << i << "簇" << endl;
		vector t = cluster[i];
		for (int j = 0; j < t.size(); j++) {
			cout << t[j].attr1 << " " << t[j].attr2 << " " << t[j].attr3 << " " << t[j].attr4 << " " << endl;
		}
		cout << t.size() << endl;
	}
	cout << "输出误差:" << newVar << endl;
}

int main() {
	char fname[255] = "iris.txt";

/*	cout << "请输入文件路径" << endl;
	cin >> fname;
	cout << endl;
	*/
	ifstream infile;
	infile.open(fname, ios::in);
	if (!infile) {
		cout << "文件打开错误" < tuples;
	vector tuples1;
	Tuple tuple;
	Tuple1 tuple1;
	int remainder = 0;
	//从文件中读取数据
	while (!infile.eof()) {
		
		count++;
		remainder = count % 5;

		switch(remainder){
		case 0:
			getline(infile,tuple.attr5,'\n');
			tuples.push_back(tuple);
			break;
		case 1:
			getline(infile, tuple.attr1, ',');
			break;
		case 2:
			getline(infile, tuple.attr2, ',');
			break;
		case 3:
			getline(infile, tuple.attr3, ',');
			break;
		case 4:
			getline(infile, tuple.attr4, ',');
			break;
		}
	}

	for (vector::size_type i = 0; i != tuples.size(); i++) {
		
		tuple1.attr1 = stof(tuples[i].attr1);
		tuple1.attr2 = stof(tuples[i].attr2);
		tuple1.attr3 = stof(tuples[i].attr3);
		tuple1.attr4 = stof(tuples[i].attr4);
		tuple1.attr5 = tuples[i].attr5;
		tuples1.push_back(tuple1);
//		cout << tuples[i].attr1 <<"," <::size_type i = 0; i != tuples1.size(); ++i) {
		cout << tuples1[i].attr1 <<"," <


你可能感兴趣的:(IRIS数据采用Kmeans方法的C++实现)