最常使用的k个单词(Map Reduce)

http://www.lintcode.com/zh-cn/problem/top-k-frequent-words-map-reduce/

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
 * Definition of OutputCollector:
 * class OutputCollector {
 * public void collect(K key, V value);
 * // Adds a key/value pair to the output buffer
 * }
 * Definition of Document:
 * class Document {
 * public int id;
 * public String content;
 * }
 */
public class TopKFrequentWords {

    public static class Map {
        public void map(String _, Document value,
                        OutputCollector output) {
            // Write your code here
            // Output the results into output buffer.
            // Ps. output.collect(String key, int value);
            if (value != null) {
                String content = value.content;
                if (content != null && content.length() > 0) {
                    String[] split = content.split(" ");
                    for (int i = 0; i < split.length; i++) {
                        String s = split[i];
                        if (s != null && s.length() > 0) {
                            output.collect(s, value.id);
                        }
                    }
                }

            }
        }
    }

    public static class Reduce {
        private int k;
        private HashMap map = new HashMap<>();

        public void setup(int k) {
            // initialize your data structure here
            this.k = k;
        }

        public void reduce(String key, Iterator values) {
            // Write your code here
            int res = 0;
            if (values != null) {
                while (values.hasNext()) {
                    Integer next = values.next();
                    res++;
                }
            }
            Integer integer = map.get(key);
            if (integer == null) {
                integer = res;
            } else {
                integer += res;
            }
            map.put(key, integer);
        }

        public void cleanup(OutputCollector output) {
            // Output the top k pairs  into output buffer.
            // Ps. output.collect(String key, Integer value);
            Set> entries = map.entrySet();
            List> list = new ArrayList<>();
            list.addAll(entries);
            Collections.sort(list, new Comparator>() {
                @Override
                public int compare(java.util.Map.Entry o1, java.util.Map
                        .Entry o2) {
                    if (o1.getValue() == o2.getValue()) {
                        return o1.getKey().compareTo(o2.getKey());
                    }
                    return o2.getValue() - o1.getValue();
                }
            });
            for (int i = 0; i < k; i++) {
                java.util.Map.Entry stringIntegerEntry = list.get(i);

                output.collect(stringIntegerEntry.getKey(), stringIntegerEntry.getValue());
            }
        }
    }
}

你可能感兴趣的:(最常使用的k个单词(Map Reduce))