mapreduce编程自定义排序


输入数据:

[root@baolibin hadoop]# hadoop fs -text /input/haha
Warning: $HADOOP_HOME is deprecated.

2       1
3       2
1       3



代码:

package hadoop_2_6_0;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class SortText {
	
	public static class NewkWritable implements WritableComparable<NewkWritable>{
		long first;
		long second;
		
		public NewkWritable(){
			
		}
		
		public NewkWritable(long first,long second){
			this.set(first, second);
		}
		
		public void set(long first,long second){
			this.first=first;
			this.second=second;
		}
		
		@Override
		public void readFields(DataInput in) throws IOException {
			this.first=in.readLong();
			this.second=in.readLong();
		}

		@Override
		public void write(DataOutput out) throws IOException {
			out.writeLong(first);
			out.writeLong(second);
		}

		@Override
		public int compareTo(NewkWritable o) {
			//return (int) ((this.first+this.second)-(o.first+o.second));
			//按照降序和排序
			return (int) ((o.first+o.second)-(this.first+this.second));
		}
		
		@Override
		public String toString() {
			return first+"";
		}
		
	}
	
	public static class MyMapper extends Mapper<LongWritable, Text, NewkWritable, LongWritable>{
		NewkWritable k2=new NewkWritable();
		LongWritable V2=new LongWritable();
		@Override
		protected void map(LongWritable key,Text value,Mapper<LongWritable, Text, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {
			String line=value.toString();
			String[] splited=line.split("\t");
			k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));
			V2.set(Long.parseLong(splited[1]));
			//k2的toString方法,只输出了first,输出的结果正常。
			context.write(k2, V2);
		}
	}
	
	
	public static class MyReducer extends Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>{
		@Override
		protected void reduce(NewkWritable k2,Iterable<LongWritable> v2s,Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {
			Iterator<LongWritable> iterator=v2s.iterator();
			iterator.hasNext();
			LongWritable v2=iterator.next();
			context.write(k2, v2);
		}
	}
	
	public static void main(String[] args) throws Exception {
		//String INPUT_PATH=args[0];
		//String OUTPUT_PATH=args[1];
		
		String INPUT_PATH="/input/haha";
		String OUTPUT_PATH="/sort_out2";
		
		Configuration conf=new Configuration();
		Job job=Job.getInstance(conf, SortText.class.getSimpleName());
		
		job.setJarByClass(SortText.class);
		//1.1
		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.100:9000"+INPUT_PATH));
		job.setInputFormatClass(TextInputFormat.class);
		//1.2
		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(NewkWritable.class);
		job.setMapOutputValueClass(LongWritable.class);
		
		//1.3
		//1.4
		//1.5
		
		//2.2
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(NewkWritable.class);
		job.setOutputValueClass(LongWritable.class);
		
		//2.3
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.100:9000"+OUTPUT_PATH));
		job.setOutputFormatClass(TextOutputFormat.class);
		
		job.waitForCompletion(true);
		
	}
	
}



结果为:

[root@baolibin hadoop]# hadoop fs -text /sort_out2/part-r*
Warning: $HADOOP_HOME is deprecated.

3       2
1       3
2       1


解析:

结果如下,按照每行和排序。

自定义一个数据类型类,实现WritableComparable接口,并实现比较方法。


这样书写并不安全:

return (int) ((o.first+o.second)-(this.first+this.second));

但是本例子只是实现简单的少量数据的排序。


map方法按行进行字符串切分,找出要进行比较的内容:

k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));
V2.set(Long.parseLong(splited[1]));
context.write(k2, V2);


reduce方法只需要迭代进行输出即可:

Iterator<LongWritable> iterator=v2s.iterator();
iterator.hasNext();
LongWritable v2=iterator.next();
context.write(k2, v2);



你可能感兴趣的:(mapreduce编程自定义排序)