Mapreduce《案例之两表连接》
数据源:
a.txt内容
addressed addressname
1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian
b.txt内容
factoryname addressed
BeijingRedStar 1
ShenzhenThunder 3
GuangzhouHonda 2
BeijingRising 1
GuangzhouDevelopmentBank 2
Tencent 3
BackofBeijing 1
输出结果为:
factoryNameaddressName
BeijingRedStarBeijing
BeijingRisingBeijing
BackofBeijingBeijing
GuangzhouHondaGuangzhou
GuangzhouDevelopmentBankGuangzhou
ShenzhenThunderShenzhen
TencentShenzhen
========================================JAVA CODE============================
package gq;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
* Class Description:案例之两表连接测试类
*
* Author:gaoqi
*
* Date:2015年6月5日 下午2:03:08
*
*/
public class LeftJoin {
public static int TIME = 0;
public static class Map extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
String line = value.toString();
if(line.contains("factoryname") || line.contains("addressed")){
return;
}
String[] ss = line.split(" ");
if(ss[0].charAt(0) >= '0' && ss[0].charAt(0)<='9'){//adressname table
context.write(new Text(ss[0]), new Text(1+"-"+ss[1]));
}else{//factoryname table
context.write(new Text(ss[1]), new Text(2+"-"+ss[0]));
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
if(0 == TIME){
context.write(new Text("factoryName"), new Text("addressName"));
TIME++;
}
Iterator<Text> its = values.iterator();
int anum =0;
int fnum = 0;
String[] aArray = new String[20];
String[] fArray = new String[20];
while(its.hasNext()){
String value = its.next().toString();
String[] ss = value.split("-");
if(ss[0].equals("1")){
aArray[anum] = ss[1];
anum++;
}else{
fArray[fnum] = ss[1];
fnum++;
}
}
if(anum != 0 && fnum !=0){
for(int m =0;m<fnum;m++){
for(int n=0;n<anum;n++){
context.write(new Text(fArray[m]), new Text(aArray[n]));
}
}
}
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf,"LeftJoin");
job.setJarByClass(LeftJoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("hdfs://h0:9000/user/tallqi/in/inputLeftjoin"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://h0:9000/user/tallqi/in/outputLeftjoin"));
System.exit(job.waitForCompletion(true)?0:1);
}
}