K-Means实验

实验一:聚类

实验代码:
testKMeans.m ——测试主函数

function testKMeans()
    X = GenerateGaussianMixtureDataset();
    K = 2;
    [gamma, centroids] = K_Means(X, K);
    % show result
    figure(3)
    idx1 = find(gamma == 1);
    plot(X(idx1,1),X(idx1,2),'ro', 'MarkerFaceColor','r');
    hold on;
    idx2 = find(gamma == 2);
    plot(X(idx2,1),X(idx2,2),'bo','MarkerFaceColor','b');
    hold on;
    plot(centroids(:,1),centroids(:,2),'gx','LineWidth',4,'MarkerSize',15);
    title('K-Means Result');
end

GenerateGaussianMixtureDataset.m——产生混合高斯数据

function [ X ] = GenerateGaussianMixtureDataset( )
%%
%产生混合高斯数据
%%
% show real data
figure(1)
mu = [2 3];
SIGMA = [1 0; 0 2];
r = mvnrnd(mu,SIGMA,100);
plot(r(:,1),r(:,2),'rx', 'MarkerFaceColor','r','LineWidth',2,'MarkerSize',10);
hold on;
mu = [7 8];
SIGMA = [ 1 0; 0 2];
r2 = mvnrnd(mu,SIGMA,100);
plot(r2(:,1),r2(:,2),'bx', 'MarkerFaceColor','b','LineWidth',2,'MarkerSize',10);
title('Real Data');
X = [r;r2];
figure(2)
% no label
plot(X(:,1),X(:,2),'mo', 'MarkerFaceColor','m');
title('No label data : X')
end

K_Means.m—— K-Means函数

function [gamma, centroids] = K_Means( X, K )
%%
%X - input data 
%K - numbers of class
%gamma - indice variable
%%
% initialize
% get K data points from X
[N, D] = size(X);
rndp = randperm(N);
centroids = X(rndp(1:K),:);

% compute indice variable
gamma = zeros(N,1); % indice variable
sqr_dist = zeros(N,K); % square of distance
min_dist = 0;
% cost function
precost = 0;
while(1)
% E step    
for i = 1:N
    for j = 1:K
        sqr_dist(i,j) = sum((X(i,:)-centroids(j,:)).^2);
    end
    [min_dist,gamma(i)] = min(sqr_dist(i,:));
end
% M step
cost = 0;
for i = 1:K
    idx = find(gamma == i);
    cost = cost + sum(sum((X(idx,:) - repmat(centroids(i,:),size(idx,1),1)).^2,2));
    centroids(i,:) = mean(X(idx,:),1);
end
% out loop 
if abs(cost - precost) < 1e-10
    break;
else
    precost = cost;
end

end

end

实验结果:
(1)真实数据
K-Means实验_第1张图片
(2)不含标签的数据
K-Means实验_第2张图片
(3)K-Means分类结果
K-Means实验_第3张图片

实验二:图像分割

function [ segImg ] = ImageSegmentation( img, K )
repImg = zeros(size(img,1)*size(img,2),size(img,3));
segImg = zeros(size(img,1),size(img,2),size(img,3));
pos = 1;
%对图像
for i = 1:size(img,1)
   for j = 1:size(img,2)
       repImg(pos,:) = img(i,j,:);
       pos = pos+1;
   end
end
[gamma, centroids] = K_Means(repImg,K);
for i = 1:size(img,1)
   for j = 1:size(img,2)
       segImg(i,j,:) = centroids(gamma((i-1)*size(img,2)+j),:);
   end
end
end

实验结果:
这里写图片描述
K=5
这里写图片描述

实验三:图像压缩

由于K-Means可以对图片进行分割,利用少量存储空间表示K类颜色,而对应像素位置只存储颜色类别号,这样可以使得图像的存储空间大大减小,从而实现图像压缩。当然K-Means实现的图像压缩是有损压缩。

理论推导见:EM算法(期望最大化)——从EM算法角度理解K-Means与GMM的区别http://blog.csdn.net/tingyue_/article/category/6850758

你可能感兴趣的:(机器学习基础知识)