mapreduce编程(二)- 大象书中求每一年的最高温度

书上的例子是为了取出一年当中气温最高的值,那么将年份和气温做了一个复合的key.

1 通过设置了partitioner来进行分区。因为分区是按照年份来进行,所以同年的数据就可以分区到一个reducer中。

2 自定义key比较器,按照年份升序,温度值降序。这样map输出的所有kv对就是按照年份升序,温度值降序排列的。
3 自定义分组比较器,所有同一年的数据属于同一个组,那么在reduce输出的时候,只需要取第一个value就能达到输出一年最高气温的目的。

代码:


view plaincopy to clipboardprint?
package temperature; 
import java.io.DataInput; 
import java.io.DataOutput; 
import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.WritableComparable; 
import org.apache.hadoop.io.WritableComparator; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Partitioner; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
public class Temperature { 
    // 自己定义的key类应该实现WritableComparable接口 
    public static class IntPair implements WritableComparable<IntPair> { 
        int first; 
        int second; 
        /**
         * Set the left and right values.
         */ 
        public void set(int left, int right) { 
            first = left; 
            second = right; 
        } 
        public int getFirst() { 
            return first; 
        } 
        public int getSecond() { 
            return second; 
        } 
        @Override 
        // 反序列化 
        public void readFields(DataInput in) throws IOException { 
            // TODO Auto-generated method stub 
            first = in.readInt(); 
            second = in.readInt(); 
        } 
        @Override 
        // 序列化 
        public void write(DataOutput out) throws IOException { 
            // TODO Auto-generated method stub 
            out.writeInt(first); 
            out.writeInt(second); 
        } 
        @Override 
        // key的比较 
        public int compareTo(IntPair o) { 
            // TODO Auto-generated method stub 
            if (first != o.first) { 
                return first < o.first ? -1 : 1; 
            } else if (second != o.second) { 
                return second < o.second ? -1 : 1; 
            } else { 
                return 0; 
            } 
        } 
        // 新定义类应该重写的两个方法 
        @Override 
        public int hashCode() { 
            return first * 157 + second; 
        } 
        @Override 
        public boolean equals(Object right) { 
            if (right == null) 
                return false; 
            if (this == right) 
                return true; 
            if (right instanceof IntPair) { 
                IntPair r = (IntPair) right; 
                return r.first == first && r.second == second; 
            } else { 
                return false; 
            } 
        } 
    } 
    /**
     * 分区函数类。根据first确定Partition。
     */ 
    public static class FirstPartitioner extends 
            Partitioner<IntPair, NullWritable> { 
        @Override 
        public int getPartition(IntPair key, NullWritable value, 
                int numPartitions) { 
            return Math.abs(key.getFirst() * 127) % numPartitions; 
        } 
    } 
    /**
     * key比较函数类。first升序,second降序。
     */ 
    public static class KeyComparator extends WritableComparator { 
        protected KeyComparator() { 
            super(IntPair.class, true); 
        } 
        @Override 
        public int compare(WritableComparable w1, WritableComparable w2) { 
            IntPair ip1 = (IntPair) w1; 
            IntPair ip2 = (IntPair) w2; 
            int l = ip1.getFirst(); 
            int r = ip2.getFirst(); 
            int cmp = (l == r ? 0 : (l < r ? -1 : 1)); 
            if (cmp != 0) { 
                return cmp; 
            } 
            l = ip1.getSecond(); 
            r = ip2.getSecond(); 
            return l == r ? 0 : (l < r ? 1 : -1); // reverse 
        } 
    } 
    /**
     * 分组函数类。属于同一个组的value会放到同一个迭代器中,而比较是否是同一组需要使用GroupingComparator比较器。
     */ 
    // 第二种方法,继承WritableComparator 
    public static class GroupingComparator extends WritableComparator { 
        protected GroupingComparator() { 
            super(IntPair.class, true); 
        } 
        @Override 
        // Compare two WritableComparables. 
        public int compare(WritableComparable w1, WritableComparable w2) { 
            IntPair ip1 = (IntPair) w1; 
            IntPair ip2 = (IntPair) w2; 
            int l = ip1.getFirst(); 
            int r = ip2.getFirst(); 
            return l == r ? 0 : (l < r ? -1 : 1); 
        } 
    } 
    // 自定义map 
    public static class Map extends 
            Mapper<LongWritable, Text, IntPair, NullWritable> { 
        private final IntPair intkey = new IntPair(); 
        public void map(LongWritable key, Text value, Context context) 
                throws IOException, InterruptedException { 
            String line = value.toString(); 
            StringTokenizer tokenizer = new StringTokenizer(line); 
            int left = 0; 
            int right = 0; 
            if (tokenizer.hasMoreTokens()) { 
                left = Integer.parseInt(tokenizer.nextToken()); 
                if (tokenizer.hasMoreTokens()) 
                    right = Integer.parseInt(tokenizer.nextToken()); 
                intkey.set(left, right); 
                context.write(intkey, NullWritable.get()); 
            } 
        } 
    } 
    // 自定义reduce 
    // 
    public static class Reduce extends 
            Reducer<IntPair, NullWritable, IntWritable, IntWritable> { 
        private final IntWritable left = new IntWritable(); 
        private final IntWritable right = new IntWritable(); 
        public void reduce(IntPair key, Iterable<NullWritable> values, 
                Context context) throws IOException, InterruptedException { 
            left.set(key.getFirst()); 
            right.set(key.getSecond()); 
            context.write(left, right); 
        } 
    } 
    /**
     * @param args
     */ 
    public static void main(String[] args) throws IOException, 
            InterruptedException, ClassNotFoundException { 
        // TODO Auto-generated method stub 
        // 读取hadoop配置 
        Configuration conf = new Configuration(); 
        // 实例化一道作业 
        Job job = new Job(conf, "temperature"); 
        job.setJarByClass(Temperature.class); 
        // Mapper类型 
        job.setMapperClass(Map.class); 
        // 不再需要Combiner类型,因为Combiner的输出类型<Text, 
        // IntWritable>对Reduce的输入类型<IntPair, IntWritable>不适用 
        // job.setCombinerClass(Reduce.class); 
        // Reducer类型 
        job.setReducerClass(Reduce.class); 
        // 分区函数 
        job.setPartitionerClass(FirstPartitioner.class); 
        // key比较函数 
        job.setSortComparatorClass(KeyComparator.class); 
        // 分组函数 
        job.setGroupingComparatorClass(GroupingComparator.class); 
        // map 输出Key的类型 
        job.setMapOutputKeyClass(IntPair.class); 
        // map输出Value的类型 
        job.setMapOutputValueClass(NullWritable.class); 
        // rduce输出Key的类型,是Text,因为使用的OutputFormatClass是TextOutputFormat 
        job.setOutputKeyClass(IntWritable.class); 
        // rduce输出Value的类型 
        job.setOutputValueClass(IntWritable.class); 
        // 将输入的数据集分割成小数据块splites,同时提供一个RecordReder的实现。 
        job.setInputFormatClass(TextInputFormat.class); 
        // 提供一个RecordWriter的实现,负责数据输出。 
        job.setOutputFormatClass(TextOutputFormat.class); 
        // 输入hdfs路径 
        FileInputFormat.setInputPaths(job, new Path(args[0])); 
        // 输出hdfs路径 
        FileOutputFormat.setOutputPath(job, new Path(args[1])); 
        // 提交job 
        System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 


你可能感兴趣的:(apache,mapreduce,编程,hadoop)