【KNN】处理鸢尾花数据集大作业

  • 机器学习KNN算法(极其简单的入门算法)
  • 思路:输入训练集数据–>输入测试集数据–>计算–>创建Map统计数据–>输出
  • 一点点的修改:计算步骤,我仅仅计算了对应数据差的平方,没有开平方
  • 代码:
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;

//necessary data structure
struct raw {
	vector<double> data;
	string name;
};
struct result {
	double distance;
	string name;
};

/*===============================================================
  name:get_trainingset  parameter:string address, int n,vectorres
  function:get the dataset from the file
  ===============================================================*/
void get_trainingset(ifstream &trainingfile, int n,vector<raw>vec) {
	while (!trainingfile.eof()) {
		string tempname;
		vector<double> tempdata;
		//get a set of data and push them in vectordata
		for (int i = 0; i < n; i++) {
			string tempstr;
			getline(trainingfile, tempstr, ',');
			istringstream ss(tempstr);
			double tempnum;
			ss >> tempnum;
			tempdata.push_back(tempnum);
		}
		getline(trainingfile, tempname);
		struct raw tempraw;
		tempraw.data = tempdata;
		tempraw.name = tempname;
		cout<<"tempname"<<tempname<<endl;
		vec.push_back(tempraw);
	}
	trainingfile.close();
	cout<<"end"<<endl;
}
/*===============================================================
  name:get_textset  parameter:string address, int n,vectorres
  function:get the dataset from the file
  ===============================================================*/
void get_textset(ifstream& textfile, int n, vector<raw>vec) {
	while (!textfile.eof()) {
		string tempname;
		vector<double> tempdata;
		//get a set of data and push them in vectordata
		for (int i = 0; i < n; i++) {
			string tempstr;
			getline(textfile, tempstr, ',');
			istringstream ss(tempstr);
			double tempnum;
			ss >> tempnum;
			tempdata.push_back(tempnum);
		}
		getline(textfile, tempname);
		struct raw tempraw;
		tempraw.data = tempdata;
		tempraw.name = tempname;
		cout<<"tempname"<<tempname<<endl;
		vec.push_back(tempraw);
	}
	textfile.close();
	cout<<"end"<<endl;
}
/*======================================================================================
  name:square_distance  parameter:vector vec, raw text, int n,vectorresult
  function:calculate the square of the distance
  ======================================================================================*/
void square_distance(vector<raw>vec, raw text, int n, vector<result>& iris_res) {
	for (vector<raw>::iterator it = vec.begin(); it != vec.end(); it++) {
		double dis = 0;
		for (int i = 0; i < n; i++) {
			dis += pow(((*it).data[i] - text.data[i]), 2);		
			cout<<dis;
		}
		struct result res;
		cout<<"distance"<<dis<<endl;
		res.distance = dis;
		res.name = (*it).name;
		iris_res.push_back(res);
	}
}

bool cmp_vec(struct result i, struct result j) {
	if (i.distance < j.distance)return true;
	else return false;
}
bool cmp_map(const pair<string, int> i, const pair<string, int> j){
	if (i.second< j.second)return true;
	else return false;
}
/*===============================================================================
  name:make_map  parameter:vectorres,mapans,int q
  function:count the most common possibilities
  ===============================================================================*/
void make_map(vector<result>res,map<string, int>ans,int q) {
	sort(res.begin(), res.end(), cmp_vec);
	for (vector<result>::size_type i = 0; i < q; i++) {
		ans[res[i].name]++;
		cout << ans[res[i].name] << endl;
	}
	vector< pair<string, int> >ans_vec(ans.begin(), ans.end());
	sort(ans_vec.begin(), ans_vec.end(), cmp_map);
	cout << ans_vec[0].first << endl;
}


int main() {
	vector<raw>iris_vec;
	vector<raw>iris_text;
	vector<result>iris_res;
	map<string, int>iris_ans;
	ifstream trainingfile("d:\\工作\\大一下作业\\c++\\knn鸢尾花\\iris_trainingset.txt");
	ifstream textfile("d:\\工作\\大一下作业\\c++\\knn鸢尾花\\iris_textset.txt");
	if (!trainingfile || !textfile) {
		cout << "Error!";
		return 0;
	}
	//get the dimension a set of data and the number of the comparion objects
	int dimension, q;
	cout << "please input the dimension of a set of data:";
	cin >> dimension;
	cout << "please input the number of the comparison objects:";
	cin >> q;
	
	//process the data 
	get_trainingset(trainingfile,dimension, iris_vec);
	get_textset(textfile, dimension, iris_text);
	
	for (vector<raw>::iterator it = iris_text.begin(); it != iris_text.end(); it++) {
		square_distance(iris_vec,*it,n,iris_res);
		make_map(iris_res, iris_ans, q);
	}
	
	return 0;
}

【20200340】

你可能感兴趣的:(机器学习)