DBScan聚类算法Java实现

DBScan算法流程图

算法:DBScan,基于密度的聚类算法
输入:
   D:一个包含n个数据的数据集
   r:半径参数
   minPts:领域密度阈值
输出:基于密度的聚类集合
标记D中所有的点为unvisted
for each p in D
    if p.visit = unvisted
        找出与点p距离不大于r的所有点集合N
        If N.size() < minPts
           标记点p为噪声点
        Else
           for each p' in N
               If p'.visit == unvisted
                  找出与点p距离不大于r的所有点集合N'
                    If N'.size()>=minPts
                        将集合N'加入集合N中去
                    End if
                Else
                    If p'未被聚到某个簇
                        将p'聚到当前簇
                        If p'被标记为噪声点
                           将p'取消标记为噪声点
                        End If
                     End If
                 End If
             End for
         End if
     End if
End for

代码实现

Point.java 定义点,其中距离计算采用欧式距离

package Cluster.DBScan;

/**
 * Created by Jason on 2016/4/17.
 */
public class Point {
    private double x;
    private double y;
    private boolean isVisit;
    private int cluster;
    private boolean isNoised;

    public Point(double x,double y) {
        this.x = x;
        this.y = y;
        this.isVisit = false;
        this.cluster = 0;
        this.isNoised = false;
    }

    public double getDistance(Point point) {
        return Math.sqrt((x-point.x)*(x-point.x)+(y-point.y)*(y-point.y));
    }

    public void setX(double x) {
        this.x = x;
    }

    public double getX() {
        return x;
    }

    public void setY(double y) {
        this.y = y;
    }

    public double getY() {
        return y;
    }

    public void setVisit(boolean isVisit) {
        this.isVisit = isVisit;
    }

    public boolean getVisit() {
        return isVisit;
    }

    public int getCluster() {
        return cluster;
    }

    public void setNoised(boolean isNoised) {
        this.isNoised = isNoised;
    }

    public void setCluster(int cluster) {
        this.cluster = cluster;
    }

    public boolean getNoised() {
        return this.isNoised;
    }

    @Override
    public String toString() {
        return x+" "+y+" "+cluster+" "+(isNoised?1:0);
    }

}

DBScan.java

package Cluster.DBScan;

import java.util.ArrayList;

/**
 * Created by Jason on 2016/4/17.
 */
public class DBScan {
    private double radius;
    private int minPts;

    public DBScan(double radius,int minPts) {
        this.radius = radius;
        this.minPts = minPts;
    }

    public void process(ArrayList points) {
        int size = points.size();
        int idx = 0;
        int cluster = 1;
        while (idx//choose an unvisited point
            if (!p.getVisit()) {
                p.setVisit(true);//set visited
                ArrayList adjacentPoints = getAdjacentPoints(p, points);
                //set the point which adjacent points less than minPts noised
                if (adjacentPoints != null && adjacentPoints.size() < minPts) {
                    p.setNoised(true);
                } else {
                    p.setCluster(cluster);
                    for (int i = 0; i < adjacentPoints.size(); i++) {
                        Point adjacentPoint = adjacentPoints.get(i);
                        //only check unvisited point, cause only unvisited have the chance to add new adjacent points
                        if (!adjacentPoint.getVisit()) {
                            adjacentPoint.setVisit(true);
                            ArrayList adjacentAdjacentPoints = getAdjacentPoints(adjacentPoint, points);
                            //add point which adjacent points not less than minPts noised
                            if (adjacentAdjacentPoints != null && adjacentAdjacentPoints.size() >= minPts) {
                                adjacentPoints.addAll(adjacentAdjacentPoints);
                            }
                        }
                        //add point which doest not belong to any cluster
                        if (adjacentPoint.getCluster() == 0) {
                            adjacentPoint.setCluster(cluster);
                            //set point which marked noised before non-noised
                            if (adjacentPoint.getNoised()) {
                                adjacentPoint.setNoised(false);
                            }
                        }
                    }
                    cluster++;
                }
            }
        }
    }

    private ArrayList getAdjacentPoints(Point centerPoint,ArrayList points) {
        ArrayList adjacentPoints = new ArrayList();
        for (Point p:points) {
            //include centerPoint itself
            double distance = centerPoint.getDistance(p);
            if (distance<=radius) {
                adjacentPoints.add(p);
            }
        }
        return adjacentPoints;
    }

}

Data.java 随机模拟产生数据

package Cluster.DBScan;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Random;

/**
 * Created by Jason on 2016/4/17.
 */
public class Data {
    private static DecimalFormat df=(DecimalFormat) NumberFormat.getInstance();

    public static ArrayList generateSinData(int size) {
        ArrayList points = new ArrayList(size);
        Random rd = new Random(size);
        for (int i=0;i2;i++) {
            double x = format(Math.PI / (size / 2) * (i + 1));
            double y = format(Math.sin(x)) ;
            points.add(new Point(x,y));
        }
        for (int i=0;i2;i++) {
            double x = format(1.5 + Math.PI / (size/2) * (i+1));
            double y = format(Math.cos(x));
            points.add(new Point(x,y));
        }
        return points;
    }

    public static ArrayList generateSpecialData() {
        ArrayList points = new ArrayList();
        points.add(new Point(2,2));
        points.add(new Point(3,1));
        points.add(new Point(3,4));
        points.add(new Point(3,14));
        points.add(new Point(5,3));
        points.add(new Point(8,3));
        points.add(new Point(8,6));
        points.add(new Point(9,8));
        points.add(new Point(10,4));
        points.add(new Point(10,7));
        points.add(new Point(10,10));
        points.add(new Point(10,14));
        points.add(new Point(11,13));
        points.add(new Point(12,7));
        points.add(new Point(12,15));
        points.add(new Point(14,7));
        points.add(new Point(14,9));
        points.add(new Point(14,15));
        points.add(new Point(15,8));
        return points;
    }

    public static void writeData(ArrayList points,String path) {
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(path));
            for (Point point:points) {
                bw.write(point.toString()+"\n");
            }
            bw.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static double format(double x) {
        return Double.valueOf(df.format(x));
    }

}

Client.java 运行聚类算法

package Cluster.DBScan;

import java.util.ArrayList;

/**
 * Created by Jason on 2016/4/17.
 */
public class Client {


    public static void main(String[] args) {
        //ArrayList points = Data.generateSinData(200);
        //DBScan dbScan = new DBScan(0.6,4);
        ArrayList points = Data.generateSpecialData();
        DBScan dbScan = new DBScan(3,3);
        dbScan.process(points);
        for (Point p:points) {
            System.out.println(p);
        }
        Data.writeData(points,"data.txt");
    }

}

效果展示

数据展示采用matlab绘制,

a = importdata('data.txt');
m=size(a,1);
for i=1:1:m
    if a(i,3)==1
        plot(a(i,1),a(i,2),'r.');
    elseif a(i,3)==2
        plot(a(i,1),a(i,2),'b.');
    else
        plot(a(i,1),a(i,2),'k*');   
    end
    hold on;
end
grid on;

数据1,Data.generateSinData(200),聚类效果
DBScan聚类算法Java实现_第1张图片
数据2,Data.generateSpecialData(),聚类效果
DBScan聚类算法Java实现_第2张图片
不同颜色代表不同类,*代表噪声点

所有代码下载:https://github.com/lincolnmi/algorithms/tree/master/src/Cluster/DBScan

参考链接:
http://www.cnblogs.com/aijianiula/p/4339960.html
http://www.dataguru.cn/thread-18180-1-1.html

你可能感兴趣的:(算法)