先上作业题,大一的童鞋写这个,确实有一丁丁难。
题目中出现了“这些点不重合”、“挑选K个不同点”的字眼,对于前者,使用c++的set可以直接去重,对于后者,可以采用“不放回抽样”。
第一步,搭好程序框架,设计好数据结构,不涉及具体算法。看起来有些多,其实有些代码可以不要,比如用彩色输出内容。里面有一些c++的语法,可以用c替换,比如:
容器vector
函数void InitPoint(vector
可以替换为
函数void InitPoint(Point all_point[], Point central_point[]);
#include
#include
#include
using namespace std;
struct Point
{
int x;
int y;
int group_id;
Point(int _x, int _y, int _group_id)
{
x = _x;
y = _y;
group_id = _group_id;
}
bool operator<(const Point& p) const
{
if (x != p.x)
{
return x < p.x;
}
else
{
return y < p.y;
}
}
};
enum COLOR
{
BLACK = 30,
RED,
GREEN,
YELLOW,
BLUE,
PURPLE,
DARKGREEN,
WHITE
};
const int min_point_num = 15;
const int max_point_num = 65;
const int min_x = 0, min_y = 0;
const int max_x = 80, max_y = 40;
const int max_iteration_times = 1000;
const int threshold = 2;
const int k = 7;
void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);
int main()
{
vector vec_all_point;
vector vec_central_point;
InitPoint(vec_all_point, vec_central_point);
for (int i = 0; i < max_iteration_times; i++)
{
UpdatePoint(vec_all_point, vec_central_point);
if(//……)
{
break;
}
}
PrintPoint(vec_all_point);
return 0;
}
void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
//随机产生若干个点
//随机产生k个中心点
}
void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
}
void PrintPoint(const vector& vec_all_point)
{
}
void Sampling(vector data, int size, vector& sample, int n)
{
}
void GotoXY(int row, int col)
{
}
void SetColor(int color)
{
}
下面的任务就是实现六个函数。
#include
#include
#include
using namespace std;
struct Point
{
int x;
int y;
int group_id;
Point(int _x, int _y, int _group_id)
{
x = _x;
y = _y;
group_id = _group_id;
}
bool operator < (const Point& p) const
{
if (x != p.x)
{
return x < p.x;
}
else
{
return y < p.y;
}
}
};
enum COLOR
{
BLACK = 30,
RED,
GREEN,
YELLOW,
BLUE,
PURPLE,
DARKGREEN,
WHITE
};
const int min_point_num = 15;
const int max_point_num = 65;
const int min_x = 0, min_y = 0;
const int max_x = 80, max_y = 40;
const int max_iteration_times = 1000;
const int threshold = 2;
const int k = 7;
void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);
int main()
{
vector vec_all_point;
vector vec_central_point;
InitPoint(vec_all_point, vec_central_point);
for (int i = 0; i < max_iteration_times; i++)
{
vector vec_pre_central_point(vec_central_point.begin(), vec_central_point.end());
UpdatePoint(vec_all_point, vec_central_point);
int delta_x_sum = 0;
int delta_y_sum = 0;
for (int j = 0; j < k; j++)
{
delta_x_sum += fabs(vec_central_point[j].x - vec_pre_central_point[j].x);
delta_y_sum += fabs(vec_central_point[j].y - vec_pre_central_point[j].y);
}
if (delta_x_sum < threshold && delta_y_sum < threshold)
{
GotoXY(max_y + 5, 0);
cout << "更新" << i + 1 << "次,中心点不再变化" << endl;
break;
}
}
PrintPoint(vec_all_point);
GotoXY(max_y + 10, 0);
return 0;
}
void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
//随机产生若干个点
srand(unsigned int(time(NULL)));
int point_num = min_point_num + rand() % (max_point_num - min_point_num + 1);
set set_all_point;
while (true)
{
int x = min_x + rand() % (max_x - min_x + 1);
int y = min_y + rand() % (max_y - min_y + 1);
set_all_point.insert(Point(x, y, 0));
if (set_all_point.size() >= point_num)
{
break;
}
}
copy(set_all_point.begin(), set_all_point.end(), back_inserter(vec_all_point));
//随机产生k个中心点
vector data;
for (int i = 0; i < point_num; i++)
{
data.push_back(i);
}
vector sample;
Sampling(data, point_num, sample, k);
for (int i = 0; i < k; i++)
{
int central_point_index = sample[i];
vec_all_point[central_point_index].group_id = i + 1;
vec_central_point.push_back(vec_all_point[central_point_index]);
}
}
void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
for (int i = 0; i < vec_all_point.size(); i++)
{
int min_distance = INT_MAX;
int new_group_id = INT_MAX;
for (int j = 0; j < vec_central_point.size(); j++)
{
int distance = (vec_all_point[i].x - vec_central_point[j].x) * (vec_all_point[i].x - vec_central_point[j].x) + (vec_all_point[i].y - vec_central_point[j].y) * (vec_all_point[i].y - vec_central_point[j].y);
if (distance < min_distance)
{
min_distance = distance;
new_group_id = vec_central_point[j].group_id;
}
}
vec_all_point[i].group_id = new_group_id;
}
int sum_x[k] = { 0 };
int sum_y[k] = { 0 };
int avg_x[k] = { 0 };
int avg_y[k] = { 0 };
int count[k] = { 0 };
for (int i = 0; i < vec_all_point.size(); i++)
{
sum_x[vec_all_point[i].group_id - 1] += vec_all_point[i].x;
sum_y[vec_all_point[i].group_id - 1] += vec_all_point[i].y;
count[vec_all_point[i].group_id - 1]++;
}
vec_central_point.clear();
for (int i = 0; i < k; i++)
{
avg_x[i] = sum_x[i] / count[i];
avg_y[i] = sum_y[i] / count[i];
vec_central_point.push_back(Point(avg_x[i], avg_y[i], i + 1));
}
}
void PrintPoint(const vector& vec_all_point)
{
for (int i = 0; i < vec_all_point.size(); i++)
{
int row = max_y - vec_all_point[i].y;
int col = vec_all_point[i].x;
GotoXY(row, col);
SetColor(BLACK + vec_all_point[i].group_id);
cout << vec_all_point[i].group_id;
}
}
void Sampling(vector data, int size, vector& sample, int n)
{
for (int i = 0; i < n; i++)
{
int pos = rand() % (size - i);
int t = data[pos];
data[pos] = data[size - i - 1];
data[size - i - 1] = t;
}
for (int i = size - n; i < size; i++)
{
sample.push_back(data[i]);
}
}
void GotoXY(int row, int col)
{
printf("\033[%d;%dH", row, col);
}
void SetColor(int color)
{
printf("\033[%dm", color);
}
运行结果如下:
下面把数据量加大10倍,并且动态显示聚类的过程。
#include
#include
#include
#include
using namespace std;
struct Point
{
int x;
int y;
int group_id;
Point(int _x, int _y, int _group_id)
{
x = _x;
y = _y;
group_id = _group_id;
}
bool operator < (const Point& p) const
{
if (x != p.x)
{
return x < p.x;
}
else
{
return y < p.y;
}
}
};
enum COLOR
{
BLACK = 30,
RED,
GREEN,
YELLOW,
BLUE,
PURPLE,
DARKGREEN,
WHITE
};
const int min_point_num = 150;
const int max_point_num = 650;
const int min_x = 0, min_y = 0;
const int max_x = 800, max_y = 400;
const int max_iteration_times = 1000;
const int threshold = 5;
const int k = 7;
void InitPoint(vector& vec_all_point, vector& vec_central_point);
void UpdatePoint(vector& vec_all_point, vector& vec_central_point);
void PrintPoint(const vector& vec_all_point);
void Sampling(vector data, int size, vector& sample, int n);
void GotoXY(int row, int col);
void SetColor(int color);
int main()
{
vector vec_all_point;
vector vec_central_point;
InitPoint(vec_all_point, vec_central_point);
PrintPoint(vec_all_point);
Sleep(3000);
for (int i = 0; i < max_iteration_times; i++)
{
vector vec_pre_central_point(vec_central_point.begin(), vec_central_point.end());
UpdatePoint(vec_all_point, vec_central_point);
int delta_x_sum = 0;
int delta_y_sum = 0;
for (int j = 0; j < k; j++)
{
delta_x_sum += fabs(vec_central_point[j].x - vec_pre_central_point[j].x);
delta_y_sum += fabs(vec_central_point[j].y - vec_pre_central_point[j].y);
}
if (delta_x_sum < threshold && delta_y_sum < threshold)
{
GotoXY(max_y + 50, 0);
cout << "更新" << i + 1 << "次,中心点不再变化,聚类完成" << endl;
break;
}
PrintPoint(vec_all_point);
Sleep(1000);
}
GotoXY(max_y + 100, 0);
return 0;
}
void InitPoint(vector& vec_all_point, vector& vec_central_point)
{
//随机产生若干个点
srand(unsigned int(time(NULL)));
int point_num = min_point_num + rand() % (max_point_num - min_point_num + 1);
set set_all_point;
while (true)
{
int x = min_x + rand() % (max_x - min_x + 1);
int y = min_y + rand() % (max_y - min_y + 1);
set_all_point.insert(Point(x, y, 0));
if (set_all_point.size() >= point_num)
{
break;
}
}
copy(set_all_point.begin(), set_all_point.end(), back_inserter(vec_all_point));
//随机产生k个中心点
vector data;
for (int i = 0; i < point_num; i++)
{
data.push_back(i);
}
vector sample;
Sampling(data, point_num, sample, k);
for (int i = 0; i < k; i++)
{
int central_point_index = sample[i];
vec_all_point[central_point_index].group_id = i + 1;
vec_central_point.push_back(vec_all_point[central_point_index]);
}
}
void UpdatePoint(vector& vec_all_point, vector& vec_central_point)
{
for (int i = 0; i < vec_all_point.size(); i++)
{
int min_distance = INT_MAX;
int new_group_id = INT_MAX;
for (int j = 0; j < vec_central_point.size(); j++)
{
int distance = (vec_all_point[i].x - vec_central_point[j].x) * (vec_all_point[i].x - vec_central_point[j].x) + (vec_all_point[i].y - vec_central_point[j].y) * (vec_all_point[i].y - vec_central_point[j].y);
if (distance < min_distance)
{
min_distance = distance;
new_group_id = vec_central_point[j].group_id;
}
}
vec_all_point[i].group_id = new_group_id;
}
int sum_x[k] = { 0 };
int sum_y[k] = { 0 };
int avg_x[k] = { 0 };
int avg_y[k] = { 0 };
int count[k] = { 0 };
for (int i = 0; i < vec_all_point.size(); i++)
{
sum_x[vec_all_point[i].group_id - 1] += vec_all_point[i].x;
sum_y[vec_all_point[i].group_id - 1] += vec_all_point[i].y;
count[vec_all_point[i].group_id - 1]++;
}
vec_central_point.clear();
for (int i = 0; i < k; i++)
{
avg_x[i] = sum_x[i] / count[i];
avg_y[i] = sum_y[i] / count[i];
vec_central_point.push_back(Point(avg_x[i], avg_y[i], i + 1));
}
}
void PrintPoint(const vector& vec_all_point)
{
//system("cls");
for (int i = 0; i < vec_all_point.size(); i++)
{
int row = max_y - vec_all_point[i].y;
int col = vec_all_point[i].x;
GotoXY(row, col);
if (vec_all_point[i].group_id != 0)
{
SetColor(BLACK + vec_all_point[i].group_id);
cout << vec_all_point[i].group_id;
}
else
{
SetColor(WHITE);
cout << "*";
}
}
}
void Sampling(vector data, int size, vector& sample, int n)
{
for (int i = 0; i < n; i++)
{
int pos = rand() % (size - i);
int t = data[pos];
data[pos] = data[size - i - 1];
data[size - i - 1] = t;
}
for (int i = size - n; i < size; i++)
{
sample.push_back(data[i]);
}
}
void GotoXY(int row, int col)
{
printf("\033[%d;%dH", row / 10, col / 10);
}
void SetColor(int color)
{
printf("\033[%dm", color);
}
程序初始效果(随机选择k个中心点,其它样本点用*表示):
最后聚类效果:
聚类动态过程演示:
kmeans