没有使用Combiner 和 in-mapper desgin pattern import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class digitaver1 { public static class mapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] ss = value.toString().split(":"); context.write(new Text(ss[0]), new IntWritable(Integer.parseInt(ss[1]))); } } public static class reducer extends Reducer<Text, IntWritable, Text, DoubleWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> value, Context context) throws IOException, InterruptedException { int sum = 0; int cnt = 0; while(value.iterator().hasNext()){ sum += value.iterator().next().get(); cnt+=1; } context.write(key, new DoubleWritable((double)sum/(double)cnt)); } } public static void main(String[] args) { try { Job job = new Job(); job.setJarByClass(digitaver1.class); job.setJobName("digitaver1"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(mapper.class); job.setReducerClass(reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.exit( job.waitForCompletion(true) ? 0 : 1 ); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } 使用Combiner public static class mapper extends Mapper<LongWritable, Text, Text, pair>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] ss = value.toString().split(":"); pair p = new pair(Integer.parseInt(ss[1]), 1); context.write(new Text(ss[0]), p); } } public static class combiner extends Reducer<Text, pair, Text, pair>{ @Override protected void reduce(Text key, Iterable<pair> value, Context context) throws IOException, InterruptedException { int sum = 0; int cnt = 0; while(value.iterator().hasNext()){ pair p = value.iterator().next(); sum += p.getLeft().get(); cnt += p.getRight().get(); } context.write(key, new pair(sum,cnt)); } } public static class reducer extends Reducer<Text, pair, Text, DoubleWritable>{ @Override protected void reduce(Text key, Iterable<pair> value, Context context) throws IOException, InterruptedException { int sum = 0; int cnt = 0; while(value.iterator().hasNext()){ pair p = value.iterator().next(); sum += p.getLeft().get(); cnt += p.getRight().get(); } context.write(key, new DoubleWritable((double)sum/(double)cnt)); } } main函数都一样 使用in-mapper design pattern public static class mapper extends Mapper<LongWritable, Text, Text, pair>{ private Map<String,String> map ; @Override protected void setup(Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub map = new HashMap<String, String>(); } //处理完所有的输入文件再一起传给reducer或者combiner //以前map在执行过程中会一边执行一边讲输出的部分结构先传输给reducer 按照上面的话 效率会不会受影响? //虽然数据少了,但是开始的时间也推迟了??堵塞延迟小了?? //负载平衡??网络中总的数据量少了?? @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] ss = value.toString().split(":"); if(!map.containsKey(ss[0])){ map.put(ss[0], ss[1]+":"+1); }else{ String tmp = map.get(ss[0]); String[] tt = tmp.split(":"); int ta = Integer.parseInt(ss[1])+Integer.parseInt(tt[0]); int tb = Integer.parseInt(tt[1])+1; map.put(ss[0], ta+":"+tb); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for(Map.Entry<String, String> e : map.entrySet()){ String[] tt = e.getValue().split(":"); pair p = new pair(Integer.parseInt(tt[0]), Integer.parseInt(tt[1])); context.write(new Text(e.getKey()), p); } } } public static class reducer extends Reducer<Text, pair, Text, DoubleWritable>{ @Override protected void reduce(Text key, Iterable<pair> value, Context context) throws IOException, InterruptedException { int sum = 0; int cnt = 0; while(value.iterator().hasNext()){ pair p = value.iterator().next(); sum += p.getLeft().get(); cnt += p.getRight().get(); } context.write(key, new DoubleWritable((double)sum/(double)cnt)); } }
in-mapper design pattern:单个mapper结果进行聚集
Combiner:所有的mapper结果进行聚集