C#实现K-MEDOIDS聚类算法

1、任意选取K个对象作为初始聚类中心(O1,O2,…Oi…Ok)。  
2)将余下的对象分到各个类中去(该对象与哪一个聚类中心最近就被分配到哪一个聚类簇中);  
3)对于每个类(Oi)中,顺序选取一个Or,重复步骤2,计算用Or代替Oi后的误差E=各个点到其对应的中心点欧式距离之和。选择E最小的那个Or来代替Oi。
4)重复步骤3,直到K个medoids固定下来。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace K_medoids
{
    class Program
    {
        /// <summary>
        /// 程序文本数据文件应位于同一文件夹下
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            var path = string.Empty;
            int k = 0;
            try
            {
                path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, args[0]);//数据文件路径
                k = Convert.ToInt32(args[1]);
            }
            catch (Exception)
            {
                Console.Write("参数错误");
                return;
            }
            var reader=new StreamReader(path,Encoding.Default);
            var indivadulStr =reader.ReadLine().Trim();//每一行数据的字符串形式
            var stanStr= System.Text.RegularExpressions.Regex.Replace(indivadulStr, @" +", " ");
            var firstData=stanStr.Split(' ');//第一行数据
            var realIndivadul = new Indivaduls();//数据结构体
            var db = new List<Indivaduls>();//存放所有数据
            foreach (var s in firstData)
            {
                realIndivadul.Numbers.Add(Convert.ToDouble(s));
            }
            db.Add(realIndivadul);
            while ((indivadulStr=reader.ReadLine())!=null)
            {
                var stringNumber = indivadulStr.Trim().Split(' ');
                var doubleNumber = stringNumber.Select(Convert.ToDouble).ToList();
                db.Add(new Indivaduls(){Numbers =doubleNumber});
            }

            var initialCenters = new List<Indivaduls>();//聚类初始中心个体
            var gap = db.Count/k + 1;
            for (int i = 0; i < k; i++)
            {
                initialCenters.Add(db[i*gap]);
            }
            var result=Pam(db, initialCenters);
            foreach (var crow in result)
            {
                foreach (var number in crow.CenterPoint.Numbers)
                {
                    Console.Write(number+"\0");
                }
                Console.Write("\r\n");
                foreach (var point in crow.CrowsPoint)
                {
                    foreach (var number in point.Numbers)
                    {
                        Console.Write(number+"\0");
                    }
                    Console.Write("\r\n");
                }
                Console.WriteLine("========================");
            }
            Console.ReadKey();

        }
      /// <summary>
      /// 
      /// </summary>
      /// <param name="indivadulses"></param>
      /// <param name="centerPoints"></param>
      /// <returns>聚类结果 簇</returns>
        public static List<Crows> Pam(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints  )
        {
            var firstCrows = K_medoids(indivadulses, centerPoints);
            
            var resultCenterPoints = new List<Indivaduls>();//存放结果中心点
            for (int i = 0; i < firstCrows.Count; i++)//循环每一个簇
            {
                resultCenterPoints.Add(firstCrows[i].CenterPoint);
                var oldOtherCrows =new List<Crows>();
                oldOtherCrows.AddRange(firstCrows);
                oldOtherCrows.RemoveAt(i);


                var oldDiff = AbsoluteDiff(firstCrows[i], oldOtherCrows);

                var count=firstCrows[i].CrowsPoint.Count;
                for (int j = 0; j < count; j++)//循环每一个簇中的非中心点个体
                {
                    //var otherCrowsss = new List<Crows>();//除去要替换的中心点所在的簇 其他簇
                    //otherCrowsss.AddRange(firstCrows);
                    //otherCrowsss.RemoveAt(i);

                    var newCenterPoints = new List<Indivaduls>();//新的中心点集合
                    newCenterPoints.AddRange(centerPoints);
                    newCenterPoints.RemoveAt(i);
                    newCenterPoints.Add(firstCrows[i].CrowsPoint[j]);

                    var newOtherCrowsCenterPoints = new List<Indivaduls>();//新的除变化点所在簇的中心 的其他中心点
                    newOtherCrowsCenterPoints.AddRange(centerPoints);
                    newOtherCrowsCenterPoints.RemoveAt(i);
                    
                    var newCrows = K_medoids(indivadulses,newCenterPoints); //替换点后 新的聚类簇

                    var newOtherCrows = new List<Crows>();
                    var newCrow = new Crows();
                    
                    foreach (var crow in newCrows)
                    {
                        if (newOtherCrowsCenterPoints.MyContains(crow.CenterPoint))
                        {
                            newOtherCrows.Add(crow);
                        }
                        else
                        {
                            newCrow = crow;
                        }
                    }
                    var newDiff = AbsoluteDiff(newCrow,newOtherCrows);
                    if (newDiff < oldDiff)
                    {
                        resultCenterPoints[i] = newCrow.CenterPoint;
                        oldDiff = newDiff;
                    }

                }
                
            }
          var resultCrows= K_medoids(indivadulses, resultCenterPoints);
            return resultCrows;
        }
        /// <summary>
        /// 单次聚类
        /// </summary>
        /// <param name="indivadulses">待聚类个体,包括了中心点</param>
        /// <param name="centerPoints">中心点个体</param>
        /// <returns>聚类结果</returns>
        public static List<Crows> K_medoids(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints)
        {
            var resultCrows = new List<Crows>();//聚类结果 簇集合
            var indivadulsCount = indivadulses.Count;//待分配个体的个数,包括了中心点
            for (var i = 0; i < centerPoints.Count; i++)
            {
                resultCrows.Add(new Crows() { CenterPoint = centerPoints[i] });
            }
            for (int i = 0; i < indivadulsCount; i++)
            {
                if (!centerPoints.MyContains(indivadulses[i]))
                {
                    int myNumber = 0;//要将这个点归类到 序号为0的resultCrows中
                    var firstDic = P2PDistance(indivadulses[i], resultCrows[0].CenterPoint);//该点与第一个中心的距离
                    for (int j = 1; j < resultCrows.Count; j++)
                    {
                        var otherDic = P2PDistance(indivadulses[i], resultCrows[j].CenterPoint);
                        if (otherDic < firstDic)
                        {
                            firstDic = otherDic;
                            myNumber = j;
                        }
                    }
                    resultCrows[myNumber].CrowsPoint.Add(indivadulses[i]);
                }
            }
            return resultCrows;
        }
        /// <summary>
        /// 对于已经完成一次聚类后的某一个点center计算绝对误差
        /// </summary>
        /// <param name="centerCrow">要计算绝对误差的中心点群簇</param>
        /// <param name="otherPoints">除中心点群簇外的其他群簇</param>
        public static double AbsoluteDiff(Crows centerCrow,List<Crows> otherPoints )
        {
            var countCrows = otherPoints.Count;
            var  distance = Distance(centerCrow);
            for (var i = 0; i < countCrows; i++)
            {
                distance += Distance(otherPoints[i]);
            }
            return distance;
        }
        /// <summary>
        /// 计算群簇中各个点距离中心点的欧式距离
        /// </summary>
        /// <param name="crow">群簇</param>
        /// <returns>欧式距离</returns>
        public static double Distance(Crows crow)
        {
            var pointCount = crow.CrowsPoint.Count;//非中心点的个数
            var distance = 0.0;//总距离
            for (var i = 0; i < pointCount; i++)
            {
                distance += P2PDistance(crow.CenterPoint, crow.CrowsPoint[i]);
            }
            return distance;
        }
        /// <summary>
        /// 两点间欧式距离
        /// </summary>
        /// <param name="p1">点p1</param>
        /// <param name="p2">点p2</param>
        /// <returns></returns>
        public static double P2PDistance(Indivaduls p1,Indivaduls p2)
        {
            if (p1.Numbers.Count != p2.Numbers.Count || p1.Numbers.Count == 0)
            {
                throw new Exception();
            }
            var dimension = p1.Numbers.Count;
            var result = 0.0;
            for (var i = 0; i < dimension; i++)
            {
                result += (p1.Numbers[i] - p2.Numbers[i])*(p1.Numbers[i] - p2.Numbers[i]);
            }
            return Math.Sqrt(result);
        }

    }
    /// <summary>
    /// 一个点个体
    /// </summary>
    public class Indivaduls
    {
        public List<double> Numbers;
        public Indivaduls()
        {
            this.Numbers=new List<double>();
        }
        public  bool MyEquals(Indivaduls obj)
        {
            if (obj.Numbers.Count != this.Numbers.Count)
                return false;
            for (int i = 0; i < Numbers.Count; i++)
            {
                if (this.Numbers[i] != obj.Numbers[i])
                    return false;
            }
            return true;
        }
    }
    /// <summary>
    /// 一个聚类簇
    /// </summary>
    public class Crows
    {
        public Crows()
        {
            this.CrowsPoint=new List<Indivaduls>();
            this.CenterPoint=new Indivaduls();
        }
        public List<Indivaduls> CrowsPoint;//簇中除中心点外的其他个体点
        public Indivaduls CenterPoint;//聚类簇中心点
    }
    public static class ExpandList
    {
        /// <summary>
        /// 扩展方法、判断该集合中是否存在point个体
        /// </summary>
        /// <param name="indivadulses"></param>
        /// <param name="point"></param>
        /// <returns></returns>
        public static bool MyContains(this List<Indivaduls> indivadulses,Indivaduls point)
        {
            foreach (var indivadulse in indivadulses)
            {
                if (point.MyEquals(indivadulse))
                  return true;
            }
            return false;
        }
    }
}

  

你可能感兴趣的:(C#)