Mapreduce《案例之两表连接》

Mapreduce《案例之两表连接》

数据源:

a.txt内容

 

addressed addressname

1 Beijing

2 Guangzhou

3 Shenzhen

4 Xian

 

b.txt内容

factoryname addressed

BeijingRedStar 1

ShenzhenThunder 3

GuangzhouHonda 2

BeijingRising 1

GuangzhouDevelopmentBank 2

Tencent 3

BackofBeijing 1

 

输出结果为:

factoryNameaddressName

BeijingRedStarBeijing

BeijingRisingBeijing

BackofBeijingBeijing

GuangzhouHondaGuangzhou

GuangzhouDevelopmentBankGuangzhou

ShenzhenThunderShenzhen

TencentShenzhen

 

========================================JAVA CODE============================

package gq;

 

import java.io.IOException;

import java.util.Iterator;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

 * 

 * Class Description:案例之两表连接测试类

 *

 * Author:gaoqi  

 *

 * Date:2015年6月5日 下午2:03:08  

 *

 */

public class LeftJoin {

 

public static int TIME = 0;

public static class Map extends Mapper<LongWritable, Text, Text, Text>{

 

public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{

 

String line = value.toString();

if(line.contains("factoryname") || line.contains("addressed")){

return;

}

String[] ss = line.split(" ");

if(ss[0].charAt(0) >= '0' && ss[0].charAt(0)<='9'){//adressname table

context.write(new Text(ss[0]), new Text(1+"-"+ss[1]));

}else{//factoryname table

context.write(new Text(ss[1]), new Text(2+"-"+ss[0]));

}

 

}

 

}

 

public static class Reduce extends Reducer<Text, Text, Text, Text>{

 

public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{

if(0 == TIME){

context.write(new Text("factoryName"), new Text("addressName"));

TIME++;

}

Iterator<Text> its = values.iterator();

int anum =0;

int fnum = 0;

String[] aArray = new String[20];

String[] fArray = new String[20];

while(its.hasNext()){

String value = its.next().toString();

String[] ss = value.split("-");

if(ss[0].equals("1")){

aArray[anum] = ss[1];

anum++;

}else{

fArray[fnum] = ss[1];

fnum++;

}

}

if(anum != 0 && fnum !=0){

for(int m =0;m<fnum;m++){

for(int n=0;n<anum;n++){

context.write(new Text(fArray[m]), new Text(aArray[n]));

}

}

}

}

 

}

 

public static void main(String[] args) throws Exception{

 

Configuration conf = new Configuration();

 

Job job = new Job(conf,"LeftJoin");

job.setJarByClass(LeftJoin.class);

 

job.setMapperClass(Map.class);

job.setReducerClass(Reduce.class);

 

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

 

FileInputFormat.addInputPath(job, new Path("hdfs://h0:9000/user/tallqi/in/inputLeftjoin"));

FileOutputFormat.setOutputPath(job, new Path("hdfs://h0:9000/user/tallqi/in/outputLeftjoin"));

System.exit(job.waitForCompletion(true)?0:1);

 

}

}

 

 

你可能感兴趣的:(mapreduce,hadoop,demo,两表连接)