聪明的代码

code1 词法分析:领域为n的所有频度计数

在《数据算法Hadoop/Spark大数据处理技巧》一书中的第五章的反转排序中有一段映射器的代码能够得到领域为2的所有频度计数,代码量不大,但是思路让人敬佩。

Input:java is a great language
output: (java,*) 2
        (java,is) 1
        (java,a) 1
        (is,*) 3
        (is,java) 1
        (is,a)  1
        (is,great) 1
        (a,*) 4
        (a,java) 1
        (a,is) 1
        (a,great) 1
        (a,language) 1
        (great,*) 3
        (great,is) 1
        (great,a) 1
        (great,language) 1
        (language,*) 2
        (language,a) 1
        (language,great) 1
package com.jimmy.chap05;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import edu.umd.cloud9.io.pair.PairOfStrings;

public class RelativeFrequencyMapper extends Mapper{

    private int neighborWindow=2;
    private PairOfStrings pair=new PairOfStrings();
    //jimmy, it is very easy to lose "Context" and throws "Exception"
    @Override
    public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
        String[] tokens=StringUtils.split(value.toString(), " ");//jimmy,pay attention to the type cast,value is a Text.
        if(tokens.length<2){
            return;
        }
        for(int i=0;i
            String word=tokens[i];
            int start=(i-neighborWindow<0)?0:i-neighborWindow;
            int end=(i+neighborWindow>=tokens.length)?tokens.length-1:i+neighborWindow;
            for(int j=start;j<=end;j++){
                if(i==j){
                    continue;
                }
                pair.set(word, tokens[j]);
                context.write(pair,new LongWritable(1));
            }
            int totalCount=end-start;
            pair.set(word, "*");
            context.write(pair, new LongWritable(totalCount))
        }
    }
}

Code2 移动平均数

在《数据算法Hadoop/Spark大数据处理技巧》一书中的第六章移动平均中有一段使用数组的解决方案的代码,该代码中的求余符号使得数组得以循环利用,是点睛之笔

package com.jimmy.chap06;

public class SimpleMovingAverageUsingArray {
    private double sum=0.0;
    private final int period;
    private double[] window=null;
    private int pointer=0;
    private int size=0;

    public SimpleMovingAverageUsingArray(int period){
        if(period<1){
            throw new IllegalArgumentException("period must be >0");
        }
        this.period=period;
        window=new double[period];
    }

    public void addNewNumber(double number){
        sum+=number;
        if(sizeelse{
            pointer=pointer%period;
            sum-=window[pointer];
            window[pointer++]=number;
        }
    }

    public double getMovingAverage(){
        if(size==0){
            throw new IllegalArgumentException("average is undefined");
        }
        return sum/window.length;
    }
} 

Code3

使用递归的方法返回列表中的组合

package com.jimmy.util;

import java.util.List;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;

/**
 * Find unique combinations of a given collection of objects.
 *
 * @author Mahmoud arsian
 *
 */
public class Combination {

    /**
     * Will return combinations of all sizes...
     * If elements = { a, b, c }, then findCollections(elements) 
     * will return all unique combinations of elements as:
     *
     *    { [], [a], [b], [c], [a, b], [a, c], [b, c], [a, b, c] }
     *
     * @param 
     * @param elements a collection of type T elements
     * @return unique combinations of elements
     */
    public static super T>> List> findSortedCombinations(Collection elements) {
        List> result = new ArrayList>();
        for (int i = 0; i <= elements.size(); i++) {
            result.addAll(findSortedCombinations(elements, i));
        }
        return result;
    }


    /**
     * Will return unique combinations of size=n.
     * If elements = { a, b, c }, then findCollections(elements, 2) will return:
     *
     *     { [a, b], [a, c], [b, c] }
     *
     * @param 
     * @param elements a collection of type T elements
     * @param n size of combinations
     * @return unique combinations of elements of size = n
     *
     */
    public static super T>> List> findSortedCombinations(Collection elements, int n) {
        List> result = new ArrayList>();

        if (n == 0) {
            result.add(new ArrayList());
            return result;
        }

        List> combinations = findSortedCombinations(elements, n - 1);
        for (List combination: combinations) {
            for (T element: elements) {
                if (combination.contains(element)) {
                    continue;
                }

                List list = new ArrayList();
                list.addAll(combination);

                if (list.contains(element)) {
                    continue;
                }

                list.add(element);
                //sort items not to duplicate the items
                //   example: (a, b, c) and (a, c, b) might become  
                //   different items to be counted if not sorted   
                Collections.sort(list);

                if (result.contains(list)) {
                    continue;
                }

                result.add(list);
            }
        }

        return result;
    }


    public static void main(String[] args) {
        List elements = Arrays.asList("a", "b", "c", "d", "e","f");
        List> combinations = findSortedCombinations(elements, 2);
        System.out.println(combinations);
    }

}

你可能感兴趣的:(Hadoop,java)