K-mean是机器学习中重要的算法之一,有许多版本变种和应用场景。该算法的执行效率较高,应用的结果可展示性较好。
希望你能按照算法的基本原理,编写一个通用算法,然后用这个算法实现两个应用:(1)在图形界面上打点,找出它们的聚类中心;(2)实现对一张图像像素值的聚类,以此可以实现对图像的有损压缩。
注意,不要调用几行命令就实现的软件包。
首先定义如下接口(或抽象类):
public interface KmObj extends Comparable {
public double getDistance(KmObj other); //求两点间的距离
public int compareTo(KmObj other); //点排序,为了核对聚类中心是否重合
public KmObj getCenter(KmObj[] objs, int[] serials); // 通过serials选定标号,求它们的中心
}
然后实现如下的静态方法:
public static int[][] kmean(KmObj[] objs, int k)
2.输入输出及具体要求
(1) 界面打点聚类
该系统的界面如图1所示,你可以增加另外的辅助功能,如“清除所有的点和聚类中心”等。
(2)图像像素聚类压缩
用同样的算法,实现图像像素的聚类和压缩功能,如图2所示。
我使用python对k-means算法进行了简单的实现。我在p1.py(打点求中心)和p2.py(图像压缩)中分别使用这个算法对二维的坐标数据和三维的RGB数据进行了处理。
首先是p1.py.我使用tkinter包实现了简单的打点界面。具体代码如下:
from tkinter import *
import random
# 窗口初始长宽
canvas_width = 1000
canvas_height = 500
def paint(event):
python_green = "#476042"
x1, y1 = (event.x - 1), (event.y - 1)
x2, y2 = (event.x + 1), (event.y + 1)
w.create_oval(x1, y1, x2, y2, fill=python_green)
points.append((event.x, event.y))
# print(points)
# 计算距离
def distance(x, y):
sum = 0
for i in range(len(x)):
sum += (x[i] - y[i]) ** 2
ans = sum ** 0.5
return ans
def k_means(event):
# print(type(ju_num.get()), ju_num.get(), points)
# 获取数据
point = points.copy()
group_num = int(ju_num.get())
# print(point, group_num)
# 判断点数是否满足条件
if len(point) < group_num:
hint_str.set('您的点个数比分类数还少!请检查输入是否有误')
return
# 计算点
center = []
# 选取随机n个点作为初始聚类中心
for i in range(group_num):
center = random.sample(point, group_num)
# print(center[i][0], center[i][1], end=';')
new_center = []
times = 0 # 迭代次数
while times < 50 and center != new_center: # 直到无变化,或者最多执行50次
# 初始化
if times != 0:
center = new_center.copy()
new_center = []
times += 1
print(center) # 调试用
# new_center = center.copy()
groups = [] # 当前分组情况
for x in center:
groups.append([])
# 把每个点归类到合适的分组中
for p in point:
distances = []
for i in center:
distances.append(distance(p, i))
groups[distances.index(min(distances))].append(p)
# 重新计算每个分组的中心
for group in groups:
sum_x = 0
sum_y = 0
for i in group:
sum_x += i[0]
sum_y += i[1]
try:
new_center.append((sum_x / len(group), sum_y / len(group)))
except ZeroDivisionError:
print('分组出现空,计算可能有误')
new_center.append((canvas_width / 2, canvas_height / 2))
# 绘制中心点
for i in new_center:
x1, y1 = (i[0] - 3), (i[1] - 3)
x2, y2 = (i[0] + 3), (i[1] + 3)
w.create_oval(x1, y1, x2, y2, fill="#00FF7F") # 嫩绿色
# 输出提示信息,更改按钮激活状态
hint_str.set('聚类中心已计算并标出!')
button['state'] = DISABLED
# 主函数
if __name__ == '__main__':
points = [] # 用于记录点击的点
# 创建窗口
root = Tk()
root.title("K-means聚类算法演示系统")
w = Canvas(root, width=canvas_width, height=canvas_height)
w.pack(expand=YES, fill=BOTH)
# 左键单击,绘制点+记录进points
w.bind("" , paint)
hint_str = StringVar()
hint_str.set('点击窗口任意位置以绘制点')
hint = Label(root, textvariable=hint_str)
hint.pack(side=TOP)
bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)
message = Label(bottom_frame, text="请输入最终的聚类数目:")
message.grid(column=0, row=0)
ju_num = StringVar()
ju_num.set('3') # 初始默认值
entry = Entry(bottom_frame, textvariable=ju_num)
entry.grid(column=1, row=0)
button = Button(bottom_frame, text='K-means!')
button.grid(column=2, row=0)
button.bind('' , k_means)
# 界面绘制完毕
mainloop()
然后是p2.py.我使用PIL包读取了源文件同目录下的trump.jpeg,经过处理后将结果输出在同目录下的result.jpg中。具体代码如下:
from PIL import Image
import numpy as np
import random
# 保存图片
def write_image(array, path):
img = Image.fromarray(array)
img.save(path)
# 计算距离
def distance(x, y):
sum = 0
for i in range(len(x)):
sum += (int(x[i]) - int(y[i])) ** 2
ans = sum ** 0.5
return ans
# 将图片矩阵进行k-means处理
def k_means(pic_array, group_num):
# 记录行列,拆成一维
line_num = 0
col_num = 0
array = []
for line in pic_array:
line_num += 1
for col in line:
if line_num == 1:
col_num += 1
array.append([col[0], col[1], col[2]])
# 计算点
center = []
# 选取随机n个点作为初始聚类中心
for i in range(group_num):
center = random.sample(array, group_num)
print(center)
new_center = []
times = 0 # 迭代次数
while times < 50 and center != new_center: # 直到无变化,或者最多执行50次
# 初始化
if times != 0:
center = new_center.copy()
new_center = []
times += 1
print(center) # 调试用
# new_center = center.copy()
groups = [] # 当前分组情况
for x in center:
groups.append([])
# 把每个点归类到合适的分组中
for p in array:
distances = []
for i in center:
distances.append(distance(p, i))
groups[distances.index(min(distances))].append(p)
# 重新计算每个分组的中心
for group in groups:
sum_r = 0
sum_g = 0
sum_b = 0
for i in group:
sum_r += i[0]
sum_g += i[1]
sum_b += i[2]
try:
new_center.append([sum_r / len(group), sum_g / len(group), sum_b / len(group)])
except ZeroDivisionError:
print('分组出现空,计算可能有误')
new_center.append([100, 100, 100])
# 得到了new_center,把图片矩阵进行相应转换
for line in pic_array:
for col in line:
distances = []
for i in new_center:
distances.append(distance(col, i))
index = distances.index(min(distances))
# 两边类型不一样,必须手动替换数据
col[0] = int(new_center[index][0])
col[1] = int(new_center[index][1])
col[2] = int(new_center[index][2])
# 返回处理后的图片矩阵
print('k—means已完成')
return pic_array
if __name__ == '__main__':
image_path = 'trump.jpeg'
image = Image.open(image_path)
image_array = np.array(image) # 一个三维数组,每格分别存着rgb,下标012
# for i, line in enumerate(image_array):
# print('line', i, sep='', end=':')
# for col in line:
# print('[', col[0], col[1], col[2], ']', end=',')
# print()
group_num = int(input('分组数:'))
# write_image(image_array, 'result.jpg')
new_array = k_means(image_array, group_num)
print('生成图片输出中...')
write_image(new_array, 'result.jpg')
print('输出完成')