1、任意选取K个对象作为初始聚类中心(O1,O2,…Oi…Ok)。
2)将余下的对象分到各个类中去(该对象与哪一个聚类中心最近就被分配到哪一个聚类簇中);
3)对于每个类(Oi)中,顺序选取一个Or,重复步骤2,计算用Or代替Oi后的误差E=各个点到其对应的中心点欧式距离之和。选择E最小的那个Or来代替Oi。
4)重复步骤3,直到K个medoids固定下来。
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; namespace K_medoids { class Program { /// <summary> /// 程序文本数据文件应位于同一文件夹下 /// </summary> /// <param name="args"></param> static void Main(string[] args) { var path = string.Empty; int k = 0; try { path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, args[0]);//数据文件路径 k = Convert.ToInt32(args[1]); } catch (Exception) { Console.Write("参数错误"); return; } var reader=new StreamReader(path,Encoding.Default); var indivadulStr =reader.ReadLine().Trim();//每一行数据的字符串形式 var stanStr= System.Text.RegularExpressions.Regex.Replace(indivadulStr, @" +", " "); var firstData=stanStr.Split(' ');//第一行数据 var realIndivadul = new Indivaduls();//数据结构体 var db = new List<Indivaduls>();//存放所有数据 foreach (var s in firstData) { realIndivadul.Numbers.Add(Convert.ToDouble(s)); } db.Add(realIndivadul); while ((indivadulStr=reader.ReadLine())!=null) { var stringNumber = indivadulStr.Trim().Split(' '); var doubleNumber = stringNumber.Select(Convert.ToDouble).ToList(); db.Add(new Indivaduls(){Numbers =doubleNumber}); } var initialCenters = new List<Indivaduls>();//聚类初始中心个体 var gap = db.Count/k + 1; for (int i = 0; i < k; i++) { initialCenters.Add(db[i*gap]); } var result=Pam(db, initialCenters); foreach (var crow in result) { foreach (var number in crow.CenterPoint.Numbers) { Console.Write(number+"\0"); } Console.Write("\r\n"); foreach (var point in crow.CrowsPoint) { foreach (var number in point.Numbers) { Console.Write(number+"\0"); } Console.Write("\r\n"); } Console.WriteLine("========================"); } Console.ReadKey(); } /// <summary> /// /// </summary> /// <param name="indivadulses"></param> /// <param name="centerPoints"></param> /// <returns>聚类结果 簇</returns> public static List<Crows> Pam(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints ) { var firstCrows = K_medoids(indivadulses, centerPoints); var resultCenterPoints = new List<Indivaduls>();//存放结果中心点 for (int i = 0; i < firstCrows.Count; i++)//循环每一个簇 { resultCenterPoints.Add(firstCrows[i].CenterPoint); var oldOtherCrows =new List<Crows>(); oldOtherCrows.AddRange(firstCrows); oldOtherCrows.RemoveAt(i); var oldDiff = AbsoluteDiff(firstCrows[i], oldOtherCrows); var count=firstCrows[i].CrowsPoint.Count; for (int j = 0; j < count; j++)//循环每一个簇中的非中心点个体 { //var otherCrowsss = new List<Crows>();//除去要替换的中心点所在的簇 其他簇 //otherCrowsss.AddRange(firstCrows); //otherCrowsss.RemoveAt(i); var newCenterPoints = new List<Indivaduls>();//新的中心点集合 newCenterPoints.AddRange(centerPoints); newCenterPoints.RemoveAt(i); newCenterPoints.Add(firstCrows[i].CrowsPoint[j]); var newOtherCrowsCenterPoints = new List<Indivaduls>();//新的除变化点所在簇的中心 的其他中心点 newOtherCrowsCenterPoints.AddRange(centerPoints); newOtherCrowsCenterPoints.RemoveAt(i); var newCrows = K_medoids(indivadulses,newCenterPoints); //替换点后 新的聚类簇 var newOtherCrows = new List<Crows>(); var newCrow = new Crows(); foreach (var crow in newCrows) { if (newOtherCrowsCenterPoints.MyContains(crow.CenterPoint)) { newOtherCrows.Add(crow); } else { newCrow = crow; } } var newDiff = AbsoluteDiff(newCrow,newOtherCrows); if (newDiff < oldDiff) { resultCenterPoints[i] = newCrow.CenterPoint; oldDiff = newDiff; } } } var resultCrows= K_medoids(indivadulses, resultCenterPoints); return resultCrows; } /// <summary> /// 单次聚类 /// </summary> /// <param name="indivadulses">待聚类个体,包括了中心点</param> /// <param name="centerPoints">中心点个体</param> /// <returns>聚类结果</returns> public static List<Crows> K_medoids(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints) { var resultCrows = new List<Crows>();//聚类结果 簇集合 var indivadulsCount = indivadulses.Count;//待分配个体的个数,包括了中心点 for (var i = 0; i < centerPoints.Count; i++) { resultCrows.Add(new Crows() { CenterPoint = centerPoints[i] }); } for (int i = 0; i < indivadulsCount; i++) { if (!centerPoints.MyContains(indivadulses[i])) { int myNumber = 0;//要将这个点归类到 序号为0的resultCrows中 var firstDic = P2PDistance(indivadulses[i], resultCrows[0].CenterPoint);//该点与第一个中心的距离 for (int j = 1; j < resultCrows.Count; j++) { var otherDic = P2PDistance(indivadulses[i], resultCrows[j].CenterPoint); if (otherDic < firstDic) { firstDic = otherDic; myNumber = j; } } resultCrows[myNumber].CrowsPoint.Add(indivadulses[i]); } } return resultCrows; } /// <summary> /// 对于已经完成一次聚类后的某一个点center计算绝对误差 /// </summary> /// <param name="centerCrow">要计算绝对误差的中心点群簇</param> /// <param name="otherPoints">除中心点群簇外的其他群簇</param> public static double AbsoluteDiff(Crows centerCrow,List<Crows> otherPoints ) { var countCrows = otherPoints.Count; var distance = Distance(centerCrow); for (var i = 0; i < countCrows; i++) { distance += Distance(otherPoints[i]); } return distance; } /// <summary> /// 计算群簇中各个点距离中心点的欧式距离 /// </summary> /// <param name="crow">群簇</param> /// <returns>欧式距离</returns> public static double Distance(Crows crow) { var pointCount = crow.CrowsPoint.Count;//非中心点的个数 var distance = 0.0;//总距离 for (var i = 0; i < pointCount; i++) { distance += P2PDistance(crow.CenterPoint, crow.CrowsPoint[i]); } return distance; } /// <summary> /// 两点间欧式距离 /// </summary> /// <param name="p1">点p1</param> /// <param name="p2">点p2</param> /// <returns></returns> public static double P2PDistance(Indivaduls p1,Indivaduls p2) { if (p1.Numbers.Count != p2.Numbers.Count || p1.Numbers.Count == 0) { throw new Exception(); } var dimension = p1.Numbers.Count; var result = 0.0; for (var i = 0; i < dimension; i++) { result += (p1.Numbers[i] - p2.Numbers[i])*(p1.Numbers[i] - p2.Numbers[i]); } return Math.Sqrt(result); } } /// <summary> /// 一个点个体 /// </summary> public class Indivaduls { public List<double> Numbers; public Indivaduls() { this.Numbers=new List<double>(); } public bool MyEquals(Indivaduls obj) { if (obj.Numbers.Count != this.Numbers.Count) return false; for (int i = 0; i < Numbers.Count; i++) { if (this.Numbers[i] != obj.Numbers[i]) return false; } return true; } } /// <summary> /// 一个聚类簇 /// </summary> public class Crows { public Crows() { this.CrowsPoint=new List<Indivaduls>(); this.CenterPoint=new Indivaduls(); } public List<Indivaduls> CrowsPoint;//簇中除中心点外的其他个体点 public Indivaduls CenterPoint;//聚类簇中心点 } public static class ExpandList { /// <summary> /// 扩展方法、判断该集合中是否存在point个体 /// </summary> /// <param name="indivadulses"></param> /// <param name="point"></param> /// <returns></returns> public static bool MyContains(this List<Indivaduls> indivadulses,Indivaduls point) { foreach (var indivadulse in indivadulses) { if (point.MyEquals(indivadulse)) return true; } return false; } } }