举一个例子:一个学号+姓名文本,一个学号+成绩文本,通过MapReduce1整合,再通过MapReduce2对数据的成绩进行排序。
student.txt
2017111111 一凡
2017222222 张三
2017333333 李四
2017444444 王五
2017555555 赵刘
score.txt
2017111111 100
2017222222 99
2017333333 98
2017444444 99
2017555555 97
MultiMapperJoinReducer2 .java 整合类+主方法
package mywork03;
import java.io.IOException;
import mywork03.mapreduce.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MultiMapperJoinReducer2 {
public static int time = 0;
public static class MapA extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
String[] str = values.toString().split(" ");
context.write(new Text(str[0]), new Text("MapA"+str[1]));
}
}
public static class MapB extends Mapper<Text, Text, Text, Text> {
public void map(Text key, Text values, Context context) throws IOException, InterruptedException {
// String[] str = values.toString().split(" ");
context.write(key, new Text("MapB"+values));
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer data = new StringBuffer();
StringBuffer data1 = new StringBuffer();
StringBuffer data2 = new StringBuffer();
for (Text val2 : values) {
if(val2.find("MapA")==0){
data1.append(val2);
}else{
data2.append(val2);
}
}
data.append(data1.substring(4)).append(" "+data2.substring(4));
context.write(new Text(key), new Text(""+data));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//第一个job为整合所有数据
Job job = Job.getInstance(conf);
job.setJobName("Table Join");
job.setJarByClass(MultiMapperJoinReducer2.class);
job.setNumReduceTasks(1);
MultipleInputs.addInputPath(job, new Path("F:/xxx/student.txt"), TextInputFormat.class,MapA.class);
MultipleInputs.addInputPath(job, new Path("F:/xxx/score.txt"), KeyValueTextInputFormat.class,MapB.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
// System.exit(job.waitForCompletion(true) ? 0 : 1);
job.waitForCompletion(true);
//第二个job为排序
String inputPath="hdfs://localhost:8020/output/";
String outputPath="hdfs://localhost:8020/output1";
args=new String[] {inputPath,outputPath};
Configuration conf2=new Configuration();
Job job2=Job.getInstance(conf2);
//这里很重要,不能填map和reduce方法所属的mapreduce类,得填此地本来,这样才能找到main方法
job2.setJarByClass(MultiMapperJoinReducer2.class);
job2.setOutputKeyClass(Bean.class);
job.setOutputValueClass(NullWritable.class);
job2.setMapperClass(Map.class);
job2.setReducerClass(Reduce.class);
FileInputFormat.addInputPath(job2, new Path(args[0]));
FileOutputFormat.setOutputPath(job2, new Path(args[1]));
job2.waitForCompletion(true);
}
}
mapreduce.java (排序的方法)
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class mapreduce{
public static class Map extends Mapper{
Bean student=new Bean();
protected void map(LongWritable key ,Text value,Context context) throws IOException,InterruptedException{
String [] strs=value.toString().split("\t");
System.out.println(strs.length);
System.out.println("strs[0]:"+strs[0]+" "+"strs[1]:"+strs[1]+" "+"strs[2]:"+strs[2]);
student.set(strs);
context.write(student, NullWritable.get());
}
}
public static class Reduce extends Reducer{
protected void reduce(Bean key,Iterable values ,Context context)throws IOException,InterruptedException{
System.out.println("key:::"+key);
for (NullWritable value : values) {
context.write(key, NullWritable.get());
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String inputPath="hdfs://localhost:8020/output/";
String outputPath="hdfs://localhost:8020/output1";
args=new String[] {inputPath,outputPath};
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(mapreduce.class);
job.setOutputKeyClass(Bean.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
bean.java (排序需要的自定义类)
package mywork03;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class Bean implements WritableComparable<Bean>{
String studentId;
String name;
int score;
public Bean() {
super();
}
public void set(String[] strs) {
this.studentId = strs[0];
this.name = strs[1];
this.score = Integer.parseInt(strs[2]);
System.out.println("strs[0]:"+strs[0]+" "+"strs[1]:"+strs[1]+" "+"strs[2]:"+strs[2]);
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getStudentId() {
return studentId;
}
public void setStudentId(String studentId) {
this.studentId = studentId;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
@Override
public void readFields(DataInput in) throws IOException {
this.studentId=in.readUTF();
this.name=in.readUTF();
this.score=in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(studentId);
out.writeUTF(name);
out.writeInt(score);
}
@Override
public int compareTo(Bean o) {
int thisValue=this.score;
int thatValue=o.score;
return (thisValue<thatValue ?-1 :(thisValue == thatValue ? 0 :1));
}
//显示不然就只有对象地址了
@Override
public String toString() {
// TODO Auto-generated method stub
return "学号:"+this.studentId+" 姓名:"+this.name+" 成绩:"+this.score;
}
}