kmeans聚类算法C++实现

先上作业题,大一的童鞋写这个,确实有一丁丁难。

kmeans聚类算法C++实现_第1张图片

题目中出现了“这些点不重合”、“挑选K个不同点”的字眼,对于前者,使用c++的set可以直接去重,对于后者,可以采用“不放回抽样”。

第一步,搭好程序框架,设计好数据结构,不涉及具体算法。看起来有些多,其实有些代码可以不要,比如用彩色输出内容。里面有一些c++的语法,可以用c替换,比如:

容器vector vec_all_point; 可以替换为结构体数组Point all_point[];

函数void InitPoint(vector& vec_all_point, vector& vec_central_point);

可以替换为

函数void InitPoint(Point all_point[], Point central_point[]);

#include 
#include 
#include 
using namespace std;

struct Point
{
	int x;
	int y;
	int group_id;

	Point(int _x, int _y, int _group_id)
	{
		x = _x;
		y = _y;
		group_id = _group_id;
	}
	bool operator<(const Point& p) const
	{
		if (x != p.x)
		{
			return x < p.x;
		}
		else
		{
			return y < p.y;
		}
	}
};

enum COLOR
{
	BLACK = 30,
	RED,
	GREEN,
	YELLOW,
	BLUE,
	PURPLE,
	DARKGREEN,
	WHITE
};

const int min_point_num = 15;
const int max_point_num = 65;
const int min_x = 0, min_y = 0;
const int max_x = 80, max_y = 40;
const int max_iteration_times = 1000;
const int threshold = 2;
const int k = 7;

void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);

int main()
{
	vector vec_all_point;
	vector vec_central_point;

	InitPoint(vec_all_point, vec_central_point);

	for (int i = 0; i < max_iteration_times; i++)
	{
		UpdatePoint(vec_all_point, vec_central_point);
		if(//……)
		{
			break;
		}
	}

	PrintPoint(vec_all_point);

	return 0;
}

void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
	//随机产生若干个点
	
	//随机产生k个中心点
}

void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
	
}

void PrintPoint(const vector& vec_all_point)
{

}

void Sampling(vector data, int size, vector& sample, int n)
{
	
}

void GotoXY(int row, int col)
{
	
}

void SetColor(int color)
{
	
}

下面的任务就是实现六个函数。

#include 
#include 
#include 
using namespace std;

struct Point
{
	int x;
	int y;
	int group_id;

	Point(int _x, int _y, int _group_id)
	{
		x = _x;
		y = _y;
		group_id = _group_id;
	}
	bool operator < (const Point& p) const
	{
		if (x != p.x)
		{
			return x < p.x;
		}
		else
		{
			return y < p.y;
		}
	}
};

enum COLOR
{
	BLACK = 30,
	RED,
	GREEN,
	YELLOW,
	BLUE,
	PURPLE,
	DARKGREEN,
	WHITE
};

const int min_point_num = 15;
const int max_point_num = 65;
const int min_x = 0, min_y = 0;
const int max_x = 80, max_y = 40;
const int max_iteration_times = 1000;
const int threshold = 2;
const int k = 7;

void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);

int main()
{
	vector vec_all_point;
	vector vec_central_point;

	InitPoint(vec_all_point, vec_central_point);

	for (int i = 0; i < max_iteration_times; i++)
	{
		vector vec_pre_central_point(vec_central_point.begin(), vec_central_point.end());
		UpdatePoint(vec_all_point, vec_central_point);
		int delta_x_sum = 0;
		int delta_y_sum = 0;
		for (int j = 0; j < k; j++)
		{
			delta_x_sum += fabs(vec_central_point[j].x - vec_pre_central_point[j].x);
			delta_y_sum += fabs(vec_central_point[j].y - vec_pre_central_point[j].y);
		}
		if (delta_x_sum < threshold && delta_y_sum < threshold)
		{
			GotoXY(max_y + 5, 0);
			cout << "更新" << i + 1 << "次,中心点不再变化" << endl;
			break;
		}
	}

	PrintPoint(vec_all_point);

	GotoXY(max_y + 10, 0);

	return 0;
}

void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
	//随机产生若干个点
	srand(unsigned int(time(NULL)));
	int point_num = min_point_num + rand() % (max_point_num - min_point_num + 1);

	set set_all_point;
	while (true)
	{
		int x = min_x + rand() % (max_x - min_x + 1);
		int y = min_y + rand() % (max_y - min_y + 1);
		set_all_point.insert(Point(x, y, 0));
		if (set_all_point.size() >= point_num)
		{
			break;
		}
	}

	copy(set_all_point.begin(), set_all_point.end(), back_inserter(vec_all_point));

	//随机产生k个中心点
	vector data;
	for (int i = 0; i < point_num; i++)
	{
		data.push_back(i);
	}

	vector sample;
	Sampling(data, point_num, sample, k);

	for (int i = 0; i < k; i++)
	{
		int central_point_index = sample[i];
		vec_all_point[central_point_index].group_id = i + 1;
		vec_central_point.push_back(vec_all_point[central_point_index]);
	}
}

void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
	for (int i = 0; i < vec_all_point.size(); i++)
	{
		int min_distance = INT_MAX;
		int new_group_id = INT_MAX;
		for (int j = 0; j < vec_central_point.size(); j++)
		{
			int distance = (vec_all_point[i].x - vec_central_point[j].x) * (vec_all_point[i].x - vec_central_point[j].x) + (vec_all_point[i].y - vec_central_point[j].y) * (vec_all_point[i].y - vec_central_point[j].y);
			if (distance < min_distance)
			{
				min_distance = distance;
				new_group_id = vec_central_point[j].group_id;
			}
		}
		vec_all_point[i].group_id = new_group_id;
	}

	int sum_x[k] = { 0 };
	int sum_y[k] = { 0 };
	int avg_x[k] = { 0 };
	int avg_y[k] = { 0 };
	int count[k] = { 0 };

	for (int i = 0; i < vec_all_point.size(); i++)
	{
		sum_x[vec_all_point[i].group_id - 1] += vec_all_point[i].x;
		sum_y[vec_all_point[i].group_id - 1] += vec_all_point[i].y;
		count[vec_all_point[i].group_id - 1]++;
	}

	vec_central_point.clear();

	for (int i = 0; i < k; i++)
	{
		avg_x[i] = sum_x[i] / count[i];
		avg_y[i] = sum_y[i] / count[i];
		vec_central_point.push_back(Point(avg_x[i], avg_y[i], i + 1));
	}
}

void PrintPoint(const vector& vec_all_point)
{
	for (int i = 0; i < vec_all_point.size(); i++)
	{
		int row = max_y - vec_all_point[i].y;
		int col = vec_all_point[i].x;
		GotoXY(row, col);
		SetColor(BLACK + vec_all_point[i].group_id);
		cout << vec_all_point[i].group_id;
	}
}

void Sampling(vector data, int size, vector& sample, int n)
{
	for (int i = 0; i < n; i++)
	{
		int pos = rand() % (size - i);
		int t = data[pos];
		data[pos] = data[size - i - 1];
		data[size - i - 1] = t;
	}

	for (int i = size - n; i < size; i++)
	{
		sample.push_back(data[i]);
	}
}

void GotoXY(int row, int col)
{
	printf("\033[%d;%dH", row, col);
}

void SetColor(int color)
{
	printf("\033[%dm", color);
}

运行结果如下:

kmeans聚类算法C++实现_第2张图片

下面把数据量加大10倍,并且动态显示聚类的过程。

#include 
#include 
#include 
#include 
using namespace std;

struct Point
{
	int x;
	int y;
	int group_id;

	Point(int _x, int _y, int _group_id)
	{
		x = _x;
		y = _y;
		group_id = _group_id;
	}
	bool operator < (const Point& p) const
	{
		if (x != p.x)
		{
			return x < p.x;
		}
		else
		{
			return y < p.y;
		}
	}
};

enum COLOR
{
	BLACK = 30,
	RED,
	GREEN,
	YELLOW,
	BLUE,
	PURPLE,
	DARKGREEN,
	WHITE
};

const int min_point_num = 150;
const int max_point_num = 650;
const int min_x = 0, min_y = 0;
const int max_x = 800, max_y = 400;
const int max_iteration_times = 1000;
const int threshold = 5;
const int k = 7;

void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);

int main()
{
	vector vec_all_point;
	vector vec_central_point;

	InitPoint(vec_all_point, vec_central_point);
	PrintPoint(vec_all_point);
	Sleep(3000);

	for (int i = 0; i < max_iteration_times; i++)
	{
		vector vec_pre_central_point(vec_central_point.begin(), vec_central_point.end());
		UpdatePoint(vec_all_point, vec_central_point);
		int delta_x_sum = 0;
		int delta_y_sum = 0;
		for (int j = 0; j < k; j++)
		{
			delta_x_sum += fabs(vec_central_point[j].x - vec_pre_central_point[j].x);
			delta_y_sum += fabs(vec_central_point[j].y - vec_pre_central_point[j].y);
		}
		if (delta_x_sum < threshold && delta_y_sum < threshold)
		{
			GotoXY(max_y + 50, 0);
			cout << "更新" << i + 1 << "次,中心点不再变化,聚类完成" << endl;
			break;
		}
		PrintPoint(vec_all_point);
		Sleep(1000);
	}

	GotoXY(max_y + 100, 0);

	return 0;
}

void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
	//随机产生若干个点
	srand(unsigned int(time(NULL)));
	int point_num = min_point_num + rand() % (max_point_num - min_point_num + 1);

	set set_all_point;
	while (true)
	{
		int x = min_x + rand() % (max_x - min_x + 1);
		int y = min_y + rand() % (max_y - min_y + 1);
		set_all_point.insert(Point(x, y, 0));
		if (set_all_point.size() >= point_num)
		{
			break;
		}
	}

	copy(set_all_point.begin(), set_all_point.end(), back_inserter(vec_all_point));

	//随机产生k个中心点
	vector data;
	for (int i = 0; i < point_num; i++)
	{
		data.push_back(i);
	}

	vector sample;
	Sampling(data, point_num, sample, k);

	for (int i = 0; i < k; i++)
	{
		int central_point_index = sample[i];
		vec_all_point[central_point_index].group_id = i + 1;
		vec_central_point.push_back(vec_all_point[central_point_index]);
	}
}

void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
	for (int i = 0; i < vec_all_point.size(); i++)
	{
		int min_distance = INT_MAX;
		int new_group_id = INT_MAX;
		for (int j = 0; j < vec_central_point.size(); j++)
		{
			int distance = (vec_all_point[i].x - vec_central_point[j].x) * (vec_all_point[i].x - vec_central_point[j].x) + (vec_all_point[i].y - vec_central_point[j].y) * (vec_all_point[i].y - vec_central_point[j].y);
			if (distance < min_distance)
			{
				min_distance = distance;
				new_group_id = vec_central_point[j].group_id;
			}
		}
		vec_all_point[i].group_id = new_group_id;
	}

	int sum_x[k] = { 0 };
	int sum_y[k] = { 0 };
	int avg_x[k] = { 0 };
	int avg_y[k] = { 0 };
	int count[k] = { 0 };

	for (int i = 0; i < vec_all_point.size(); i++)
	{
		sum_x[vec_all_point[i].group_id - 1] += vec_all_point[i].x;
		sum_y[vec_all_point[i].group_id - 1] += vec_all_point[i].y;
		count[vec_all_point[i].group_id - 1]++;
	}

	vec_central_point.clear();

	for (int i = 0; i < k; i++)
	{
		avg_x[i] = sum_x[i] / count[i];
		avg_y[i] = sum_y[i] / count[i];
		vec_central_point.push_back(Point(avg_x[i], avg_y[i], i + 1));
	}
}

void PrintPoint(const vector& vec_all_point)
{
	//system("cls");
	for (int i = 0; i < vec_all_point.size(); i++)
	{
		int row = max_y - vec_all_point[i].y;
		int col = vec_all_point[i].x;
		GotoXY(row, col);

		if (vec_all_point[i].group_id != 0)
		{
			SetColor(BLACK + vec_all_point[i].group_id);
			cout << vec_all_point[i].group_id;
		}
		else
		{
			SetColor(WHITE);
			cout << "*";
		}
	}
}

void Sampling(vector data, int size, vector& sample, int n)
{
	for (int i = 0; i < n; i++)
	{
		int pos = rand() % (size - i);
		int t = data[pos];
		data[pos] = data[size - i - 1];
		data[size - i - 1] = t;
	}

	for (int i = size - n; i < size; i++)
	{
		sample.push_back(data[i]);
	}
}

void GotoXY(int row, int col)
{
	printf("\033[%d;%dH", row / 10, col / 10);
}

void SetColor(int color)
{
	printf("\033[%dm", color);
}

程序初始效果(随机选择k个中心点,其它样本点用*表示):

kmeans聚类算法C++实现_第3张图片

最后聚类效果:

kmeans聚类算法C++实现_第4张图片

聚类动态过程演示:

kmeans

你可能感兴趣的:(算法,kmeans,聚类,c++)