mapreduce topN
package mr.topN; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class TopNJob extends Configuration implements Tool{ private String input =null; private String output =null; private String N = null; @Override public Configuration getConf() { return new Configuration(); } @Override public void setConf(Configuration conf) { } @Override public int run(String[] arg0) throws Exception { setArgs(arg0); checkParam(); Configuration conf = new Configuration(); if(N==null||"".equals(N.trim())){ conf.set("N", N); } Job job = new Job(conf, "TopNJob"); job.setJarByClass(TopNJob.class); job.setMapperClass(TopNMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(TopNReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(input)); Path path = new Path(output); FileSystem fs = FileSystem.get(conf); if(fs.exists(path)){ fs.delete(path,true); } FileOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true) ? 0 : 1; } private void checkParam() { if(input==null||"".equals(input.trim())){ System.out.println("no input path!"); userMaunel(); System.exit(-1); } if(output==null||"".equals(output.trim())){ System.out.println("no output path!"); userMaunel(); System.exit(-1); } } private void userMaunel() { System.out.println("<args> eg -i input -o output [-n top defalt=10]"); } //-i xx -o xxx -n xx private void setArgs(String[] args) { for(int i=0;i<args.length;i++){ if("-i".equals(args[i])){ input = args[++i]; } if("-o".equals(args[i])){ output = args[++i]; } if("-n".equals(args[i])){ N = args[++i]; } } } public static void main(String[] args) throws Exception { ToolRunner.run(new TopNJob(), args); } }
package mr.topN; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class TopNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> { int len; int[] top; @Override protected void cleanup( Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { for(int x=1;x<len+1;x++){ context.write(new IntWritable(top[x]), new IntWritable(top[x])); } } @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { String line = value.toString().trim(); if(line.length()>0){ int payment = Integer.parseInt(line); add(payment); } } private void add(int payment) { top[0] = payment; Arrays.sort(top); } @Override protected void setup( Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { len = context.getConfiguration().getInt("N", 10); top = new int[len+1]; } }
package mr.topN; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Reducer; public class TopNReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> { int len; int[] top; @Override protected void cleanup( Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { for(int i=len;i>0;i--){ context.write(new IntWritable(len-i+1), new IntWritable(top[i])); } } @Override protected void reduce( IntWritable arg0, Iterable<IntWritable> arg1, Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context arg2) throws IOException, InterruptedException { add(arg0.get()); } private void add(int payment) { top[0] = payment; Arrays.sort(top); } @Override protected void setup( Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException { len = context.getConfiguration().getInt("N", 10); top = new int[len+1]; } }