hadoop学习笔记之数据排序

package cn.yws;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;



public class MySort {
	
	public static class MyMap extends Mapper<Object, Text, IntWritable, IntWritable>
	{
		private static IntWritable data=new IntWritable();
		
		@Override
		protected void map(Object key, Text value,
				org.apache.hadoop.mapreduce.Mapper.Context context)
				throws IOException, InterruptedException {
			String line=value.toString();
			data.set(Integer.parseInt(line));
			context.write(data, new IntWritable(1));
			
			//super.map(key, value, context);
		}
	}
	
	public static class MyReduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>
	{
		private static IntWritable linenum=new IntWritable();
		
		protected void reduce(IntWritable key, Iterable<IntWritable> values,
				org.apache.hadoop.mapreduce.Reducer.Context context)
				throws IOException, InterruptedException {
			
			//super.reduce(key, values, context);
			for(IntWritable val:values)
			{
				context.write(linenum, key);
				linenum=new IntWritable(linenum.get()+1);
			}
		}
	}
	public static void main(String[] args) throws Exception {
		
		Configuration configuration=new Configuration();
		configuration.set("mapred.job.tracker", "192.168.1.9:9001");
		String[] ioArgs=new String[]{"sort_in","sort_out"};
		if(args.length==2)
		{
			ioArgs=args;
		}
		String otherArgs[]=new GenericOptionsParser(configuration,ioArgs).getRemainingArgs();
		
		if(otherArgs.length!=2)
		{
			System.err.println("Usage: Data MySort <in> <out>");
			System.exit(2);
		}
		
		Job job=new Job(configuration,"data sort");
		
		job.setJarByClass(MySort.class);
		
		job.setMapperClass(MyMap.class);
		job.setCombinerClass(MyReduce.class);
		job.setReducerClass(MyReduce.class);
		
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		
		
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		
		System.exit(job.waitForCompletion(true)?0:1);
	}
	
	

}

sort_in

12
55
122
4564
12
54
152
79
32
458
452464
1
23
45
45


你可能感兴趣的:(hadoop学习笔记之数据排序)