Hadoop-mapreduce reducer端多表合并

package cn.nyzc.reducejoin;


import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


import org.apache.hadoop.io.Writable;


/**
 * 传输的bean对象
 *

Title: OrderBean


 *

Description:


 *

Company: www.xnlc.cn

 
 * @author 黄庭华
 * @date 2016年7月17日下午2:52:36
 * @version 1.0
 */
public class OrderBean implements Writable{

private String oid;//订单id
private String pid;//商品id
private  int amount;//订单中商品的数量
private String pname;//商品名称
private String flag;//标记符号 "0"代表订单表 "1"代表商品表


//输出到文件中的内容由tostring方法控制
@Override
public String toString() {
return oid + "\t" + pname +"\t"+ amount;
}

//方便order表对象存储
public void setOrder(String oid,String pid,int amount){
this.oid=oid;
this.pid=pid;
this.amount=amount;
this.pname="";
this.flag="0";
}

//方便pro表对象存储
public void setPro(String pid,String pname){
this.oid="";
this.pid=pid;
this.amount=0;
this.pname=pname;
this.flag="1";
}




public String getOid() {
return oid;
}


public void setOid(String oid) {
this.oid = oid;
}


public String getPid() {
return pid;
}


public void setPid(String pid) {
this.pid = pid;
}


public int getAmount() {
return amount;
}


public void setAmount(int amount) {
this.amount = amount;
}


public String getPname() {
return pname;
}


public void setPname(String pname) {
this.pname = pname;
}


public String getFlag() {
return flag;
}


public void setFlag(String flag) {
this.flag = flag;
}


@Override
public void readFields(DataInput in) throws IOException {
this.oid=in.readUTF();
this.pid=in.readUTF();
this.pname=in.readUTF();
this.amount=in.readInt();
this.flag=in.readUTF();
}


@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(oid);
out.writeUTF(pid);
out.writeUTF(pname);
out.writeInt(amount);
out.writeUTF(flag);

}


}



//=========================================



package cn.nyzc.reducejoin;


import java.io.IOException;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;


/**
 * 
 *

Title: ReduceJoinMapper


 *

Description:


 *

Company: www.xnlc.cn

 
 * @author 黄庭华
 * @date 2016年7月17日下午2:52:52
 * @version 1.0
 */
public class ReduceJoinMapper extends Mapper {


// 输出数据的暂存容器
OrderBean bean = new OrderBean();
Text text = new Text();


@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {


// 首先判断输入数据出自哪张表
FileSplit sp = (FileSplit) context.getInputSplit();
String name = sp.getPath().getName();


// 判断
if (name.startsWith("order")) {
// 获取数据
String[] values = value.toString().split("\t");
// 处理数据
bean.setOrder(values[0], values[1], Integer.parseInt(values[2]));
text.set(values[1]);


} else if (name.startsWith("pd")) {
// 获取数据
String[] values = value.toString().split("\t");
// 处理数据
bean.setPro(values[0], values[1]);
text.set(values[0]);
}


// 输出数据
context.write(text, bean);


}


}



//============================================



package cn.nyzc.reducejoin;


import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;


import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


/**
 * 
 *

Title: ReduceJoinReducer


 *

Description:


 *

Company: www.xnlc.cn

 
 * @author 黄庭华
 * @date 2016年7月17日下午2:51:48
 * @version 1.0
 */
public class ReduceJoinReducer extends Reducer{

//输出数据的暂存容器
List olist=new ArrayList<>();
String pname;
//OrderBean pro=new OrderBean(); 


@Override
protected void reduce(Text key, Iterable values,
Context context) throws IOException, InterruptedException {
//进入方法之前,先把缓存容器清空
olist.clear();

//遍历并分别处理bean对象
for (OrderBean orderBean : values) {
//取出判断的标记
String flag = orderBean.getFlag();
//判断bean对象的类型
if(flag.equals("0")){
OrderBean buf=new OrderBean();
try {
BeanUtils.copyProperties(buf, orderBean);
} catch (IllegalAccessException | InvocationTargetException e) {
e.printStackTrace();
}
//把遍历出来的bean添加到缓存容器中
olist.add(buf);

}else if(flag.equals("1")){
//把需要合并的字段缓存一下
pname=orderBean.getPname();
}
}


//输出数据
for (OrderBean bean : olist) {
//把合并的短缺字段补齐
bean.setPname(pname);
context.write(bean, NullWritable.get());
}

}


}



//======================================



package cn.nyzc.reducejoin;


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * 任务驱动类
 *

Title: ReduceJoinDriver


 *

Description:


 *

Company: www.xnlc.cn

 
 * @author 黄庭华
 * @date 2016年7月17日下午2:54:27
 * @version 1.0
 */
public class ReduceJoinDriver {

public static void main(String[] args) throws Exception {
args=new String[]{"e:/fortest/input03","e:/output01"};

//1 创建任务对象Job
Job job = Job.getInstance(new Configuration());

//2 设置jar所在位置
job.setJarByClass(ReduceJoinDriver.class);

//3 设置mapreduce程序运行的主类
job.setMapperClass(ReduceJoinMapper.class);
job.setReducerClass(ReduceJoinReducer.class);


//4 设置各阶段输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(OrderBean.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);

//5 设置数据源和结果数据的路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

//6 提交
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);

}


}



你可能感兴趣的:(Hadoop-mapreduce reducer端多表合并)