上一篇文章实现的二次排序key和value都是数字,接下来实现一组key为字母或单词,value为数字的二次排序。
hadoop fs -put secondsort2 /secondsort2
MyKey类:
package com.mapreduce.secondarysort2;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class MyKey implements WritableComparable<MyKey> {
private Text first = new Text();
private int second = 0;
public void set(Text first, int second){
this.first = first;
this.second = second;
}
public Text getFirst() {
return first;
}
public int getSecond() {
return second;
}
//这是比较的关键,对key进行比较时默认会调用compareTo()方法
@Override
public int compareTo(MyKey o) {
if(first.compareTo(o.first) != 0){
return first.compareTo(o.first);
}else if(second != o.second){
return second - o.second;
}else{
return 0;
}
}
@Override
public void write(DataOutput out) throws IOException {
first.write(out);
out.writeInt(second);
}
@Override
public void readFields(DataInput in) throws IOException {
first.readFields(in);
second = in.readInt();
}
@Override
public String toString(){
return "<"+ first.toString() + ", "+ second + ">";
}
/*
@Override
public int hashCode(){
return first+"".hashCode()+second+"".hashCode();
}
@Override
public boolean equals(Object right){
if(right instanceof MyKey){
MyKey myKey = (MyKey)right;
return myKey.first == first && myKey.second == second;
}else {
return false;
}
}
*/
}
MyMapper类:
package com.mapreduce.secondarysort2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MyMapper extends Mapper<LongWritable, Text, MyKey, IntWritable> {
private final MyKey key = new MyKey();
private final IntWritable value = new IntWritable();
@Override
public void map(LongWritable inkey, Text invalue, Context context)
throws IOException, InterruptedException{
String[] strs = invalue.toString().split("\t");
System.out.println(strs[0]+ "\t" + Integer.parseInt(strs[1]));
key.set(new Text(strs[0]), Integer.parseInt(strs[1]));
value.set(Integer.parseInt(strs[1]));
System.out.println("MyMappr : ");
System.out.println("key = "+ key + ", value = "+ value);
context.write(key, value);
}
}
GroupingComparator类:
package com.mapreduce.secondarysort2;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class GroupingComparator extends WritableComparator {
public GroupingComparator(){super(MyKey.class, true);}
@Override
public int compare(WritableComparable a, WritableComparable b){
MyKey myKey = (MyKey)a;
MyKey myKey1 = (MyKey)b;
//如果结果为0,则被分配到一个组内,然后每个组调用一次Reducer
return myKey.getFirst().compareTo(myKey1.getFirst());
}
}
MyReducer类:
package com.mapreduce.secondarysort2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyReducer extends Reducer<MyKey, IntWritable, Text, IntWritable> {
private final Text SIGN = new Text("********************");
private final Text first = new Text();
@Override
public void reduce(MyKey key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException{
System.out.println("Reducer : ");
System.out.print("MyKey = "+key.toString() + "values = ");
context.write(SIGN, null);//分组符
first.set(key.getFirst()); // 获取第一个值传入到key中
for(IntWritable value : values){ // values值会自动进行排序
System.out.print(value+" ");
context.write(first, value);
}
System.out.println();
}
}
SecondarySortApp2类:
package com.mapreduce.secondarysort2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.net.URI;
public class SecondarySortApp2 {
private static final String INPUT_PATH = "hdfs://master002:9000/secondsort2";
private static final String OUTPUT_PATH = "hdfs://master002:9000/outputsecondsort2";
public static void main(String[] args) throws Exception{
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf = new Configuration();
//提升代码的健壮性
final FileSystem fileSystem = FileSystem.get(URI.create(INPUT_PATH), conf);
if(fileSystem.exists(new Path(OUTPUT_PATH))){
fileSystem.delete(new Path(OUTPUT_PATH), true);
}
Job job = Job.getInstance(conf, "SecondarySort2App");
//run jar class 主方法
job.setJarByClass(SecondarySortApp2.class);
//设置map
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(MyKey.class);
job.setMapOutputValueClass(IntWritable.class);
//设置reduce
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置Group
job.setGroupingComparatorClass(GroupingComparator.class);
//设置input format
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
//设置output format
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
//提交job
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
MapReduce基础编程到这里也就基本结束了,接下来。。。