k-means是一种无监督的学习,事先不知道类别,自动将相似的对象归到同一个簇中,它是一种聚类分析(cluster analysis)的算法,其主要是来计算数据聚集的算法,主要通过不断地取离种子点最近均值的算法。
先随机选取K个对象作为初始的聚类中心。然后计算每个对象与各个种子聚类中心之间的距离,把每个对象分配给距离它最近的聚类中心。聚类中心以及分配给它们的对象就代表一个聚类。一旦全部对象都被分配了,每个聚类的聚类中心会根据聚类中现有的对象被重新计算。经过反复迭代,直到聚类中心变化极小,输出最终的聚类结果。
本文是对图像进行进行聚类,事先通过GDAL读取图像的数据,如何进行k-means迭代并保存为新图像。话不多说,直接上代码。
2.1 获取Dataset数据
public static List GetDNdata(Dataset dataset)//将各像元DN数据存入数组方便计算
{
int xSize = dataset.RasterXSize;
int ySize = dataset.RasterYSize;
List idata = new List();
for (int i = 0; i < dataset.RasterCount; i++)
{
int[] bf = new int[xSize * ySize];
dataset.GetRasterBand(i + 1).ReadRaster(0, 0, xSize, ySize, bf, xSize, ySize, 0, 0);
idata.Add(bf);
}
return idata;
}
2.2 k-means静态函数
///
/// K均值聚类(K-means)
///
/// 原图数据集
/// 迭代阈值
/// 聚类数量
/// 保存路径
public static void K_means(Dataset dt, double threshold, int K, string filepath)
{
List data = GetDNdata(dt);
int pixels = data[0].Length;
int bands = data.Count;
// 构建光谱特征向量 [像元,波段号]
int[,] X = new int[pixels, bands];
//获取向量
for (int i = 0; i < pixels; i++)
{
for (int j = 0; j < bands; j++)
{
X[i, j] = data[j][i];
}
}
//构建聚类中心[类别,波段数]
double[,] center = new double[K, bands];
//初始化聚类中心
Random ran = new Random();
for (int i = 0; i < K; i++)
{
for (int j = 0; j < bands; j++)
{
Band band = dt.GetRasterBand(j + 1);
double[] maxandmin = { 0, 0 };
band.ComputeRasterMinMax(maxandmin, 0);
double randNum = ran.NextDouble();// 保留两位小数
center[i, j] = maxandmin[0] + (maxandmin[1] - maxandmin[0]) * randNum;
}
}
//根据欧氏距离进行迭代
bool ok = true;
int[] mark = new int[pixels];//记录每个像元的类别
while (ok)
{
//对像元进行分类
for (int i = 0; i < pixels; i++)
{
double mindistance =dist(X, center, bands, i, 0);
for (int j=0;j dist(X, center, bands, i, j))
{
mindistance = dist(X, center, bands, i, j);
mark[i] = j;
}
}
}
//每一类的数量
double[] counts = new double[K];
for (int i = 0; i < mark.Length; i++)
{
counts[mark[i]]++;
}
double[,] centersum = new double[K,bands];//记录分类像元数据累加值
for (int i = 0; i < pixels; i++)
{
for (int j = 0; j < K; j++)
{
if (mark[i] == j)
{
for (int o = 0; o < bands; o++)
centersum[j, o] += X[i, o];
}
}
}
//记录下第i类别的原始聚类中心
double[,] oldcenter = center;
//更新聚类中心
for (int m = 0; m< K; m++)
{
for (int n = 0; n < bands; n++)
center[m, n] = centersum[m, n] / counts[m];
}
double[] center_distance = new double[K];//记录中心之间的距离
//计算前后两个聚类中心的距离
for (int i = 0; i < K; i++)
{
center_distance[i] = centerdist(oldcenter, center, bands, i, i);
}
//判断迭代是否停止
Array.Sort(center_distance);
if (center_distance[K - 1] < threshold)//最大值小于阈值,则退出循环
ok = false;
}
//输出图像
//给分类后图像赋色
//灰度图
int[] outdata = new int[pixels];//赋值后的每个像素值的集合
if (K == 2)
{
for (int j = 0; j < pixels; j++)
{
if (mark[j] == 0)
outdata[j] = 0;
if (mark[j] == 1)
outdata[j] = 255;
}
}
int[] temp1 = new int[K];
temp1[0] = 0;
temp1[K - 1] = 255;
if (K> 2)//类别数大于2
{
for (int i = 1; i dns = new List { outdata };
SaveFromDataset(dt, filepath, dns, dt.RasterXSize, dt.RasterYSize);//保存为灰度图
}
2.3 计算上一中心与现在的中心的欧氏距离
public static double centerdist(double[,] X, double[,] Y, int bands, int j, int i)//计算上一个中心和现在的中心的距离
{
double dis = 0.0;
double temp = 0.0;
for (int band = 0; band < bands; band++)
{
temp += Math.Pow((X[j, band] - Y[i, band]), 2);
}
dis = Math.Sqrt(temp);
return dis;
}
2.4 保存为新图像
public static void SaveFromDataset(Dataset src, string filePath, List band, int xSize, int ySize)
{
Driver driver = null;
driver = Gdal.GetDriverByName("GTiff");
Dataset output = driver.Create(filePath, xSize, ySize, 1, src.GetRasterBand(1).DataType, null);
double[] trans = new double[6];
src.GetGeoTransform(trans);
output.SetGeoTransform(trans);
output.SetProjection(src.GetProjection());
output.GetRasterBand(1).WriteRaster(0, 0, src.RasterXSize, src.RasterYSize, band[0], xSize, ySize, 0, 0);
output.Dispose();
driver = null;
}
原图 :
效果: