我的hadoop初学程序------简单数据排序-------Sort

package bin;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;



public class Sort {

//	对输入文件中数据进行排序。输入文件中的每行内容均为一个数字,即一个数据。
//	要求在输出中每行有两个间隔的数字,其中,第一个代表原始数据在原始数据集中的位次,第二个代表原始数据。
//	private static int a;
	public static class SortMap extends Mapper {
		//这个参数第二个为甚么要用Text而不是IntWritable呢??
		private static IntWritable data = new IntWritable();
	
		public void map(Object key,Text value,Context context) {
			String line = value.toString();
			data.set(Integer.parseInt(line));//sting类型转换为int类型,要求string文本中前缀不能有空格,而且也因此要求txt文件中不能有空行
			try {
				context.write(data, new IntWritable(1));
			} catch (IOException | InterruptedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}
	
	public static class SortReduce extends Reducer{
		private static IntWritable lineNumber=new IntWritable(1);
		@SuppressWarnings("unused")
		public void reduce(IntWritable key,Iterable values,Context context) {
			IntWritable previous =new IntWritable(key.get()-1);
			for (IntWritable value : values) {
				if (key.get()!=previous.get()) {
					try {
						context.write(lineNumber, key);
					} catch (IOException | InterruptedException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
				lineNumber =new IntWritable(lineNumber.get()+1);
				previous=key;
			}
		}
	}
	
	/**
	 * @param args
	 * @throws IOException 
	 * @throws InterruptedException 
	 * @throws ClassNotFoundException 
	 */
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		// TODO Auto-generated method stub
		Configuration configuration=new Configuration();
		String[] otherArgs=new GenericOptionsParser(configuration, args).getRemainingArgs();
		if (otherArgs.length!=2) {
			System.out.println("Usage: Sort  ");
			System.exit(2);
		}
		Job job=new Job(configuration, "Tacert Sort");
		job.setJarByClass(Sort.class);
		job.setMapperClass(SortMap.class);
//		job.setCombinerClass(SortReduce.class);   这里加上combiner之后结果是错的
		job.setReducerClass(SortReduce.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		System.exit(job.waitForCompletion(true)? 0 : 1);
	}

}


你可能感兴趣的:(我的hadoop初学程序------简单数据排序-------Sort)