MR topN

mapreduce topN

package mr.topN;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class TopNJob extends Configuration implements Tool{

	private String input =null;
	private String output =null;
	private String N = null;
	@Override
	public Configuration getConf() {
		return new Configuration();
	}

	@Override
	public void setConf(Configuration conf) {
	}
	@Override
	public int run(String[] arg0) throws Exception {
		setArgs(arg0);
		checkParam();
		Configuration conf = new Configuration();
		if(N==null||"".equals(N.trim())){
			conf.set("N", N);
		}
		Job job = new Job(conf, "TopNJob");
		job.setJarByClass(TopNJob.class);
		
		job.setMapperClass(TopNMapper.class);
		job.setMapOutputKeyClass(IntWritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		job.setReducerClass(TopNReducer.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path(input));
		Path path = new Path(output);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(path)){
			fs.delete(path,true);
		}
		FileOutputFormat.setOutputPath(job, path);
		return job.waitForCompletion(true) ? 0 : 1;
	}
	private void checkParam() {
		if(input==null||"".equals(input.trim())){
			System.out.println("no input path!");
			userMaunel();
			System.exit(-1);
		}
		if(output==null||"".equals(output.trim())){
			System.out.println("no output path!");
			userMaunel();
			System.exit(-1);
		}
	}

	private void userMaunel() {
		System.out.println("<args> eg -i input -o output [-n top defalt=10]");
	}

	//-i xx -o xxx -n xx
	private void setArgs(String[] args) {
		for(int i=0;i<args.length;i++){
			if("-i".equals(args[i])){
				input = args[++i];
			}
			if("-o".equals(args[i])){
				output = args[++i];
			}
			if("-n".equals(args[i])){
				N = args[++i];
			}
		}
		
	}

	public static void main(String[] args) throws Exception {
		ToolRunner.run(new TopNJob(), args);
	}
}

package mr.topN;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TopNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {

	int len;
	int[] top;
	@Override
	protected void cleanup(
			Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		for(int x=1;x<len+1;x++){
			context.write(new IntWritable(top[x]), new IntWritable(top[x]));
		}
	}

	@Override
	protected void map(LongWritable key, Text value,
			Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		String line = value.toString().trim();
		if(line.length()>0){
			int payment = Integer.parseInt(line);
			add(payment);
		}
	}

	private void add(int payment) {
		top[0] = payment;
		Arrays.sort(top);
	}

	@Override
	protected void setup(
			Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		len = context.getConfiguration().getInt("N", 10);
		top = new int[len+1];
	}

	
}

package mr.topN;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class TopNReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

	int len;
	int[] top;
	@Override
	protected void cleanup(
			Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		for(int i=len;i>0;i--){
			context.write(new IntWritable(len-i+1), new IntWritable(top[i]));
		}
	}

	@Override
	protected void reduce(
			IntWritable arg0,
			Iterable<IntWritable> arg1,
			Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context arg2)
			throws IOException, InterruptedException {
		add(arg0.get());
	}
	
	private void add(int payment) {
		top[0] = payment;
		Arrays.sort(top);
	}
	@Override
	protected void setup(
			Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context)
			throws IOException, InterruptedException {
		len = context.getConfiguration().getInt("N", 10);
		top = new int[len+1];
	}
	
}


你可能感兴趣的:(我的代码)