topk算法

从1一个数中,取出前100个最大数。

最小堆排序。
1.首先读入前100个数,排成最小堆,时间复杂度为O(klogk)(k为数组的大小即为100)。
2.然后遍历后续的数字,并于堆顶(最小)数字进行比较。如果比最小的数小,则继续读取后续数字;如果比堆顶数字大,则替换堆顶元素并重新调整堆为最小堆。整个过程直至1亿个数全部遍历完为止。
该算法的时间复杂度为O(nklogk),空间复杂度是100(常数)。

package com.study.demo;

import java.util.Random;

/**
 * 从1亿个数中,取出前100个最大数
 */
public class TopHundredHeap {

    public static void main(String[] args) {
		// the size of the array
		int number = 100000000;
		// the top k values
		int k = 100;
		// the range of the values in the array
		int range = 1000000001;
 
		//input for minHeap based method
		int[] array = new int[number];
		
		Random random = new Random();
		for (int i = 0; i < number; i++) {
			array[i] = random.nextInt(range);
		}
		
		TopHundredHeap thh = new TopHundredHeap();
		
		long t1, t2;
		//start time
		t1 = System.currentTimeMillis(); 
		int[] top = thh.topHundred(array, k);
		
		//end time
		t2 = System.currentTimeMillis(); 
		System.out.println("The total execution time of " +
				"quicksort based method is " + (t2 - t1) +" millisecond!");
		
		// print out the top k largest values in the top array
		System.out.println("The top "+ k + " largest values are:");
		for (int i = 0; i < k; i++) {
			System.out.println(top[i]);
		}
		
	}
	
	public int[] topHundred(int[] array, int k) {
		// the heap with size k
		int[] top = new int[k];
		
		for (int i = 0; i < k; i++) {
			top[i] = array[i];
		}
		
		buildMinHeap(top);
		
		for (int i = k; i < array.length; i++) {
			if (top[0] < array[i]) {
				top[0] = array[i];
				minHeapify(top, 0, top.length);
			}
		}
		
		return top;
	}
	
	// create a min heap
	public void buildMinHeap(int[] array) {
        int heapSize = array.length;
        for (int i = array.length / 2 - 1; i >= 0; i--) {
            minHeapify(array, i, heapSize);
        }
	}
	
	 /// MinHeapify is to build the min heap from the 'position'
    public void minHeapify(int[] array, int position, int heapSize)
    {
        int left = left(position);
        int right = right(position);
        int minPosition = position;
        
        if (left < heapSize && array[left] < array[position]) {
            minPosition = left;
        }
        
        if (right < heapSize && array[right] < array[minPosition]) {
            minPosition = right;
        }
        
        if (position != minPosition) {
        	swap(array, position, minPosition);
            minHeapify(array, minPosition, heapSize);
        }
    }
    
    public void swap(int[] array, int i, int j) {
		int temp = array[i];
		array[i] = array[j];
		array[j] = temp;		
	}
    
    /// return the left child position
    public int left(int i)
    {
        return 2 * i + 1;
    }
    /// return the right child position
    public int right(int i)
    {
        return 2 * i + 2;
    } 
}

你可能感兴趣的:(算法)