MapReduce编程实例

MapReduce编程实例(六)


前提准备:

1.hadoop安装运行正常。Hadoop安装配置请参考:Ubuntu下 Hadoop 1.2.1 配置安装

2.集成开发环境正常。集成开发环境配置请参考 :Ubuntu 搭建Hadoop源码阅读环境


MapReduce编程实例:

MapReduce编程实例(一),详细介绍在集成环境中运行第一个MapReduce程序 WordCount及代码分析

MapReduce编程实例(二),计算学生平均成绩

MapReduce编程实例(三),数据去重

MapReduce编程实例(四),排序

MapReduce编程实例(五),MapReduce实现单表关联

MapReduce编程实例(六),MapReduce实现多表关联


多表关联
描述:
两张表关联,如下:
左表:
factoryname address
BMW Factory 2
Benz Factory 3
Voivo Factory 4
LG Factory 5

右表:
addressID addressname
2 Beijing
3 Guangzhou
4 Shenzhen
5 Sanya

根据addressID关联求出factoryname-address表。很明显,左右关联即可,和单表关联一样。不多作表述,有需要可以查看单表关联的分析。

[java] view plain copy
  1. package com.t.hadoop;  
  2.   
  3. import java.io.IOException;  
  4. import java.util.ArrayList;  
  5. import java.util.List;  
  6.   
  7. import org.apache.hadoop.conf.Configuration;  
  8. import org.apache.hadoop.fs.Path;  
  9. import org.apache.hadoop.io.IntWritable;  
  10. import org.apache.hadoop.io.Text;  
  11. import org.apache.hadoop.mapreduce.Job;  
  12. import org.apache.hadoop.mapreduce.Mapper;  
  13. import org.apache.hadoop.mapreduce.Reducer;  
  14. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
  15. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
  16. import org.apache.hadoop.util.GenericOptionsParser;  
  17.   
  18. /** 
  19.  * 多表排序 
  20.  * @author daT [email protected] 
  21.  * 
  22.  */  
  23. public class MTJoin {  
  24.     public static int times = 1;  
  25.       
  26.     public static class MTMapper extends Mapper<Object, Text, Text, Text>{  
  27.   
  28.         @Override  
  29.         protected void map(Object key, Text value, Context context)  
  30.                 throws IOException, InterruptedException {  
  31.             String relation = new String();  
  32.             String line = value.toString();  
  33.             if(line.contains("factoryname")||line.contains("addressID")) return;  
  34.             int i = 0;  
  35.             while(line.charAt(i)<'0'||line.charAt(i)>'9'){  
  36.                 i++;  
  37.             }  
  38.             if(i>0){//左表  
  39.                 relation = "1";  
  40.                 context.write(new Text(String.valueOf(line.charAt(i))),new Text(relation + line.substring(0,i-1)));  
  41.             }else{//右表  
  42.                 relation = "2";  
  43.                 context.write(new Text(String.valueOf(line.charAt(i))),new Text(relation +line.substring(i+1)));  
  44.             }  
  45.               
  46.         }  
  47.           
  48.     }  
  49.       
  50.       
  51.     public static class MTReducer extends Reducer<Text, Text, Text, Text>{  
  52.   
  53.         @Override  
  54.         protected void reduce(Text key, Iterable<Text> value,Context context)  
  55.                 throws IOException, InterruptedException {  
  56.             if(times==1){  
  57.                 context.write(new Text("factoryName"), new Text("Address"));  
  58.                 times ++;  
  59.             }  
  60.             int factoryNum = 0;  
  61.             int addressNum = 0;  
  62.             String[] factorys = new String[10];  
  63.             String[] addresses = new String[10];  
  64.               
  65.             for(Text t:value){  
  66.                 if(t.charAt(0)=='1'){//左表  
  67.                     factorys[factoryNum]=t.toString().substring(1);  
  68.                     factoryNum++;  
  69.                 }else{//右表  
  70.                     addresses[addressNum]=t.toString().substring(1);  
  71.                     addressNum++;  
  72.                 }  
  73.             }  
  74.               
  75.             for(int i = 0;i<factoryNum;i++){  
  76.                 for(int j=0;j<addressNum;j++){  
  77.                     context.write(new Text(factorys[i]), new Text(addresses[j]));  
  78.                 }  
  79.             }  
  80.               
  81.         }  
  82.           
  83.     }  
  84.       
  85.     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{  
  86.         Configuration conf = new Configuration();  
  87.         String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();  
  88.           
  89.         if(otherArgs.length<2){  
  90.             System.out.println("Parameters error");  
  91.             System.exit(2);  
  92.         }  
  93.           
  94.         Job job =new Job(conf,"MTjoin");  
  95.         job.setJarByClass(MTJoin.class);  
  96.         job.setMapperClass(MTMapper.class);  
  97.         job.setReducerClass(MTReducer.class);  
  98.         job.setOutputKeyClass(Text.class);  
  99.         job.setOutputValueClass(Text.class);  
  100.           
  101.         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
  102.         FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
  103.           
  104.         System.exit(job.waitForCompletion(true)?0:1);  
  105.           
  106.     }  
  107. }  


输出结果:
factoryName Address
BMW Factory Beijing
Benz Factory Guangzhou
Voivo Factory Shenzhen
LG Factory Sanya

你可能感兴趣的:(MapReduce编程实例)