[C++] Kmeans算法实现

kmeans原理

  • 1.初始化k个起始中心点;
  • 2.计算所有样本点到这些中心点的距离,对于单个样本点,把它归类成和距离最近的中心点一类;
  • 3.聚类好所有样本点后,对聚到同一类的点,计算坐标均值,更新中心点;
  • 4.循环2、3两步,直到达到指定的循环次数或者满足退出循环条件时(如每次循环中心点移动距离小于某个值),退出循环.

讲一下题目

输入序列坐标和聚类数k:
[ ( x 1 , y 1 ) , ( x 2 , y 2 , . . . ) ] , k [(x_1,y_1),(x_2,y_2,...)],k [(x1,y1),(x2,y2,...)],k
使用前几个坐标作为初始化的中心点

迭代100次

输出分类结果label:
[ 1 , 1 , 1 , 0 , 1 , 2 , . . . ] [1,1,1,0,1,2,...] [1,1,1,0,1,2,...]

代码部分

定义全局变量

//点集数目
int POINTNUM=1000;
// 聚类数
int k=3;
//迭代次数
int ITER=100;

随机生成二维测试点集

	//随机生成二维测试点集
	vector<Point2f> points;
	for (int i = 0; i < POINTNUM; i++)
	{
		Point2f point;
		point.x = rand() % 500;
		point.y = rand() % 500;
		points.push_back(point);
	}

计算两点间距离的函数

float getDistance(Point2f point, Point2f center)
{
	return sqrt(pow(point.x - center.x, 2) + pow(point.y - center.y, 2));
}

Kmeans部分

	//使用前几个坐标初始化中心点
	vector<Point2f> center;
	for (int i = 0; i < k; ++i)
	{
		center.push_back(points[i]);
	}
	
	//初始化label
	vector<int> label(POINTNUM,-1);

	//迭代
	for (int iter = 0; iter < ITER; ++iter)
	{
		//根据与中心点的距离,对每一个样本点进行聚类
		for (int pointNum = 0; pointNum < POINTNUM; ++pointNum)
		{
			float distance = FLT_MAX;

			for (int cluster = 0; cluster < k; ++cluster)
			{
				float temp_distance = getDistance(points[pointNum], center[cluster]);
				if (temp_distance < distance)
				{
					distance = temp_distance;

					label[pointNum] = cluster;
				}
			}
		}

		//根据聚类结果,计算坐标均值,更新中心点坐标
		for (int cluster = 0; cluster < k; cluster++)
		{
			int count = 0;
			int sum_x = 0;
			int sum_y = 0;
			for (int pointNum = 0; pointNum < POINTNUM; pointNum++)
			{
				if (label[pointNum] == cluster)
				{
					count++;
					sum_x += points[pointNum].x;
					sum_y += points[pointNum].y;
				}
			}
			center[cluster].x = sum_x / count;
			center[cluster].y = sum_y / count;
		}
	}

将结果画出来的部分代码(仅k=3)

主要是利用opencv的两个函数:
imshow(用图片的方式显示矩阵Mat)
circle(在矩阵Mat上画圆)

Mat img = Mat::zeros(500, 500, CV_8UC3);

for (int i = 0; i < POINTNUM; i++)
{
	if (label[i] == 0)
	{
		circle(img, points[i], 2, Scalar(255, 0, 0), FILLED, LINE_AA);
	}
	if (label[i] == 1)
	{
		circle(img, points[i], 2, Scalar(0,255,0), FILLED, LINE_AA);
	}
	if (label[i] == 2)
	{
		circle(img, points[i], 2, Scalar(0, 0, 255), FILLED, LINE_AA);
	}
}

namedWindow("img", WINDOW_AUTOSIZE);
imshow("img", img);
waitKey(0);

结果

初始随机点集:

[C++] Kmeans算法实现_第1张图片

kmeans,k=3聚类后结果

[C++] Kmeans算法实现_第2张图片

完整代码

#include 
#include 
#include 
#include 

using namespace std;
using namespace cv;

//点集数目
int POINTNUM = 1000;
// 聚类数
int k = 3;
//迭代次数
int ITER = 100;

float getDistance(Point2f point, Point2f center)
{
	return sqrt(pow(point.x - center.x, 2) + pow(point.y - center.y, 2));
}

void main()
{
	Mat img = Mat::zeros(500, 500, CV_8UC3);

	//随机生成二维测试点集
	vector<Point2f> points;
	for (int i = 0; i < POINTNUM; i++)
	{
		Point2f point;
		point.x = rand() % 500;
		point.y = rand() % 500;
		points.push_back(point);
	}

	//kmeans
	
	//使用前几个坐标初始化中心点
	vector<Point2f> center;
	for (int i = 0; i < k; ++i)
	{
		center.push_back(points[i]);
	}
	
	//初始化label
	vector<int> label(POINTNUM,-1);

	//迭代
	for (int iter = 0; iter < ITER; ++iter)
	{
		//根据与中心点的距离,对每一个样本点进行聚类
		for (int pointNum = 0; pointNum < POINTNUM; ++pointNum)
		{
			float distance = FLT_MAX;

			for (int cluster = 0; cluster < k; ++cluster)
			{
				float temp_distance = getDistance(points[pointNum], center[cluster]);
				if (temp_distance < distance)
				{
					distance = temp_distance;

					label[pointNum] = cluster;
				}
			}
		}

		//根据聚类结果,计算坐标均值,更新中心点坐标
		for (int cluster = 0; cluster < k; cluster++)
		{
			int count = 0;
			int sum_x = 0;
			int sum_y = 0;
			for (int pointNum = 0; pointNum < POINTNUM; pointNum++)
			{
				if (label[pointNum] == cluster)
				{
					count++;
					sum_x += points[pointNum].x;
					sum_y += points[pointNum].y;
				}
			}
			center[cluster].x = sum_x / count;
			center[cluster].y = sum_y / count;
		}
	}


	for (int i = 0; i < POINTNUM; i++)
	{
		if (label[i] == 0)
		{
			circle(img, points[i], 2, Scalar(255, 0, 0), FILLED, LINE_AA);
		}
		if (label[i] == 1)
		{
			circle(img, points[i], 2, Scalar(0,255,0), FILLED, LINE_AA);
		}
		if (label[i] == 2)
		{
			circle(img, points[i], 2, Scalar(0, 0, 255), FILLED, LINE_AA);
		}
	}

	namedWindow("img", WINDOW_AUTOSIZE);
	imshow("img", img);
	waitKey(0);

	return;
}


参考:
1.C++中rand()函数的用法
https://blog.csdn.net/Kallou/article/details/123554991
2.c++实现kmeans
https://blog.csdn.net/SongJ12345666/article/details/103347903
3.c++的float类型包含的最值问题…
https://blog.csdn.net/gao1440156051/article/details/50112897
4.opencv c++ circle()函数 、putText()函数小结
https://blog.csdn.net/weixin_45842951/article/details/122201959

你可能感兴趣的:(C++,机器学习,算法,c++,kmeans)