聚类算法之单链接算法java实现

聚类算法中基于链接的算法大致有三种:单链接算法(single link),平均链接算法(average link),最小生成数算法(minimum spanning tree)。现在实现单链接算法,其他算法以后再续吧。 
       单链接算法的过程是 首先生成各个元素的距离矩阵,根据距离和阀值的比对来控制生成的聚类个数,阀值越大,生成的聚类越少,直到同属一类。 
       下面例子实现了根据经纬度来实现城市的聚类。 

 

 

package test.algorithm;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class SingleLinkTest {

	public static void main(String[] args) {

		List<City> citys = new ArrayList<City>();

		City city0 = new City();
		city0.setName("北 京");
		city0.setX(116.28);
		city0.setY(39.54);
		citys.add(city0);

		City city1 = new City();
		city1.setName("上 海");
		city1.setX(121.29);
		city1.setY(31.14);
		citys.add(city1);

		City city2 = new City();
		city2.setName("天 津");
		city2.setX(117.11);
		city2.setY(39.09);
		citys.add(city2);

		City city3 = new City();
		city3.setName("重 庆");
		city3.setX(106.32);
		city3.setY(29.32);
		citys.add(city3);

		City city4 = new City();
		city4.setName("哈尔滨");
		city4.setX(126.41);
		city4.setY(45.45);
		citys.add(city4);

		City city5 = new City();
		city5.setName("长 春");
		city5.setX(125.19);
		city5.setY(43.52);
		citys.add(city5);

		City city6 = new City();
		city6.setName("南 京");
		city6.setX(118.50);
		city6.setY(32.02);
		citys.add(city6);

		City city7 = new City();
		city7.setName("武 汉");
		city7.setX(114.21);
		city7.setY(30.37);
		citys.add(city7);

		City city8 = new City();
		city8.setName("台 北");
		city8.setX(121.31);
		city8.setY(25.03);
		citys.add(city8);

		City city9 = new City();
		city9.setName("香 港");
		city9.setX(114.10);
		city9.setY(22.18);
		citys.add(city9);

		SingleLink sing = new SingleLink(citys);
		List<Set<City>> list = sing.compute();
		for (Set<City> list0 : list) {
			System.out.println("=============");
			for (City city : list0) {
				System.out.println(city.getName() + " : (" + city.getX() + ","
						+ city.getY() + ")");
			}
		}
	}

}

 

 

 

 

package test.algorithm;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * 聚类之 单链接算法
 */
class SingleLink {

	private List<City> data;

	// 默认阀值
	private double distanceX = 8;

	public SingleLink(List<City> list) {
		data = list;
	}

	public List<Set<City>> compute() {
		List<Set<City>> list = new ArrayList<Set<City>>();

		// 距离矩阵
		double[][] ds = new double[data.size()][data.size()];

		for (int i = 0; i < data.size(); i++) {
			City city1 = data.get(i);
			for (int j = i + 1; j < data.size(); j++) {
				City city2 = data.get(j);
				ds[i][j] = getDistance(city1, city2);
				// 矩阵 对称性
				ds[j][i] = ds[i][j];
			}
			ds[i][i] = 0.0;
		}

		for (int i = 0; i < ds.length; i++) {
			for (int j = 0; j < ds.length; j++) {
				System.out.print((int) ds[i][j] + ",");
			}
			System.out.println();
		}

		boolean[] hasUsed = new boolean[ds.length];
		for (int i = 0; i < ds.length; i++) {
			Set<City> setDs = new HashSet<City>();
			if (hasUsed[i]) {
				continue;
			}
			for (int j = i; j < ds.length; j++) {
				if (ds[i][j] <= distanceX && hasUsed[j] == false) {
					setDs.add(data.get(j));
					hasUsed[j] = true;
				}
			}
			if (setDs.size() > 0) {
				list.add(setDs);
			}

		}
		return list;
	}

	// 计算空间距离
	private double getDistance(City city1, City city2) {
		double distance = Math.pow(city1.getX() - city2.getX(), 2)
				+ Math.pow(city1.getY() - city2.getY(), 2);
		return Math.sqrt(distance);

	}

}

 

 

 

package test.algorithm;

/**
 * 城市
 */
class City {

	private String name;
	// 经度
	private double x;

	// 纬度
	private double y;

	public double getX() {
		return x;
	}

	public void setX(double x) {
		this.x = x;
	}

	public double getY() {
		return y;
	}

	public void setY(double y) {
		this.y = y;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (this == obj) {
			return true;
		}
		City other = (City) obj;
		if (this.getX() == other.getX() && this.getY() == other.getY()) {
			return true;
		}
		return false;
	}
}

 

你可能感兴趣的:(java,聚类算法)