Stanford Algorithms学习:Clustering

第二周的第一个编程作业,是利用贪婪算法来实现一个clustering的问题,和ML里学的unsupervised learning差不多。

Question 1

In this programming problem and the next you'll code up the clustering algorithm from lecture for computing a max-spacing  k -clustering. Download the text file  here. This file describes a distance function (equivalently, a complete graph with edge costs). It has the following format:

[number_of_nodes]
[edge 1 node 1] [edge 1 node 2] [edge 1 cost]
[edge 2 node 1] [edge 2 node 2] [edge 2 cost]
...
There is one edge  (i,j)  for each choice of  1i<jn , where  n  is the number of nodes. For example, the third line of the file is "1 3 5250", indicating that the distance between nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume that distances are positive, but you should NOT assume that they are distinct.

Your task in this problem is to run the clustering algorithm from lecture on this data set, where the target number  k  of clusters is set to 4. What is the maximum spacing of a 4-clustering?

这个算法的实现和Kruskal's MST算法很像,先对所有边进行从小到大排序,然后利用Union-Find数据结构来;区别只是不要把所有的顶点都遍历一次,而是在要求的k值(这里是4)前结束循环。要求的距离就是下一个即将被扫描到的边的cost。

这里的Union-Find和Edge数据结构都是Algorithms fourth edition by Sedgewick里面的:

public class UnionFind {
	
	
	private int[] id;					//id[i] = parent of i 
	private int[] sizes;				//size of each component
	private int count;					//number of components
	
	/**
	 * Creates an empty union-find structure with N nodes
	 * @param N
	 */
	public UnionFind(int N) {
		count = N;
		id = new int[N];
		sizes = new int[N];
		for (int i = 0; i < N; i++) {
			id[i] = i;
			sizes[i] = 1;
		}
	}
	public int count(){
		return count;
	}
	
	public boolean connected(int u, int v){
		return (find(u) == find(v));
	}
	
	/**
	 * Return component identifier for component containing p
	 * @param u
	 * @return
	 */
	private int find(int u){
		while (u != id[u]) {
			u = id[u];
		}
		return u;	
	}
	
	public void union(int u, int v){
		int i = find(u);
		int j= find(v);
		if (i == j) {
			return;
		}
		if (sizes[i] < sizes[j]) {
			id[i] = j;
			sizes[j] += sizes[i];
		}
		else {
			id[j] = i;
			sizes[i] += sizes[j];
		}
		count--;
	}
}

public class Edge implements Comparable<Edge>{
	private final int u;	//the first vertex
	private final int v;	//the other vertex
	private final int cost;	//edge cost
	
	public Edge(int u, int v, int cost) {
		this.u = u;
		this.v = v;
		this.cost = cost;
	}
	
	public int cost(){
		return cost;
	}
	/**
	 * returns one vertex of the edge
	 */
	public int either(){
		return u;
	}
	/**
	 * given one vertex, returns the other vertex of that edge
	 * @param vertex
	 * @return
	 */
	public int other(int vertex) {
		if (vertex == u) {
			return v;
		}else if (vertex == v) {
			return u;
		}
		else {
			throw new RuntimeException("Inconsistent edge");
		}
	}
	
	@Override
	public int compareTo(Edge arg0) {
		if (this.cost() < arg0.cost()) {
			return -1;
		}
		else if (this.cost() > arg0.cost) {
			return 1;
		}
		else {
			return 0;
		}
		
	}

}

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.StringTokenizer;



public class Clustering {
	private ArrayList<Edge> edges;			//stores all edges
	private UnionFind uf;
	private int N;							//number of edges
	private String fileName = "/Users/Zhusong/Documents/Study/AlgorithmsDesignAndAnalysis/Assignments/Ass2/Ass2Prob1/clustering1.txt";
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		Clustering cl = new Clustering();
		cl.run();
	}
	
	private void run(){
		edges = new ArrayList<Edge>();
		readGraph();
		sortEdges();
		uf = new UnionFind(N);
		Iterator<Edge> iterator = edges.iterator();
		while(uf.count() > 4){
			Edge edge = iterator.next();
			int u = edge.either();
			uf.union(u, edge.other(u));
		}
		while(true){
			Edge edge = iterator.next();
			int u = edge.either();
			int v = edge.other(u);
			if (uf.connected(u, v)) {
				continue;
			}
			else {
				System.out.println(edge.cost());
				break;
			}
		}

	}
	/**
	 * read in the text file and form a graph
	 */
	private void readGraph() {
		File file = new File(fileName);
		try {
			BufferedReader rd = new BufferedReader(new FileReader(file));
			String line;
			line = rd.readLine();
			N = Integer.parseInt(line);
			while((line = rd.readLine()) != null){
				StringTokenizer tokenizer = new StringTokenizer(line);
				int u = Integer.parseInt(tokenizer.nextToken()) - 1;
				int v = Integer.parseInt(tokenizer.nextToken()) - 1;
				int cost = Integer.parseInt(tokenizer.nextToken());
				addEdge(u, v, cost);
			}
			rd.close();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	private void addEdge(int u, int v, int cost){
		Edge edge = new Edge(u, v, cost);
		edges.add(edge);
	}
	
	private void sortEdges(){
		Collections.sort(edges);
	}
}


你可能感兴趣的:(java,数据结构,Union-Find)