自定义partitioner:
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Partitioner; public class MyPartitioner extends Partitioner<LongWritable, IntWritable> { @Override public int getPartition(LongWritable key, IntWritable value, int arg2) { /** * 根据行号进行分区,把行号为的偶数的分区到0号reduce * 把行号为奇数的分区到1号reduce,并把key的值设置为0或1 * 目的是为了在进入reduce时奇数和偶数能被分别放到同一个 * 迭代器中以便求和操作 */ if( key.get() % 2 == 0) { key.set(0); return 0; } else { key.set(1); return 1; } } }map阶段:
import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class MyMapper extends Mapper<LongWritable, Text, LongWritable, IntWritable> { private long lineNum = 0; private LongWritable okey = new LongWritable(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { lineNum ++; okey.set(lineNum); /** * 输出行号作为key,并把行的值作为value,这里只是简单的说明的patitioner的定制 * 不考虑多mapper情况下行号控制,这里只关注partitioner的使用就行 */ context.write(okey, new IntWritable(Integer.parseInt(value.toString()))); } }reduce阶段:
import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class MyReducer extends Reducer<LongWritable, IntWritable, Text, IntWritable> { @Override protected void reduce(LongWritable key, Iterable<IntWritable> value, Context context) throws IOException, InterruptedException { int sum = 0; for( IntWritable val : value) { sum += val.get(); } if( key.get() == 0 ) { context.write(new Text("偶数行之和为:"), new IntWritable(sum)); } else if ( key.get() == 1) { context.write(new Text("奇数行之和为:"), new IntWritable(sum)); } } }启动函数:
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class JobMain { public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "partitioner-job"); job.setJarByClass(JobMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(IntWritable.class); //设置自定义的Partitioner对map输出进行分区 job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //设置job的reducer的个数为2 job.setNumReduceTasks(2); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputDir = new Path(args[1]); FileSystem fs = FileSystem.get(configuration); if( fs.exists(outputDir)) { fs.delete(outputDir ,true); } FileOutputFormat.setOutputPath(job, outputDir); System.exit(job.waitForCompletion(true) ? 0: 1); } }运行结果: 结论: