hadoop 部门职员的join 操作

职员:
1 工号  2 姓名 3 职位 4 上级工号 5 生日 6 工资   8 部门 9工作地点

部门:
1 部门信息 2 类型 3 地址

 

数据文件

30 sales chicago
20 research dallas
10 accounting newyork

 

7499 allen salesman 7698 1981-02-20 1600 300 30 
7782 clark managers 7639 1981-06-09 2450  10 
7654 martin salesman 7698 1981-03-20 1250 1400 30 boston
7900 james clerk 7698 1981-01-09 950  30 
7788 scott analyst 7566 1981-09-01 3000 100 20

 

 

[root@master IMFdatatest]#hadoop dfs -cat /library/outputjoin1/part-r-00000
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.

16/02/16 08:46:56 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
10          10  accounting
10      7782   clark 10 
20      7788   scott 20 
20          20  research
30          30  sales
30      7900   james 30 
30      7654   martin 30 
30      7499   allen 30 

 

 

代码

 

package com.dtspark.hadoop.hellomapreduce;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class JoinWorkersInformation {
 
  public static class DataMapper
     extends Mapper<LongWritable, Text, LongWritable, WorkerInformation>{
 
 
  
   public void map(LongWritable key, Text value, Context context
                   ) throws IOException, InterruptedException {
 
    System.out.println("Map Methond Invoked!!!");
  
    String inputData =value.toString();
    String[] data= inputData.split("\t");
    if (data.length <=3){ //department
     WorkerInformation  department=  new WorkerInformation ();
     department.setDepartmentNo(data[0]);
     department.setDepartmentName(data[1]);
     department.setFlag(0);
     context.write(new LongWritable(Long.valueOf(department.getDepartmentNo())), department);
    }else { // worker
    
     WorkerInformation  worker=  new WorkerInformation ();
     worker.setWorkerNo(data[0]);
     worker.setWorkerName(data[1]);
     worker.setDepartmentNo(data[7]);
     worker.setFlag(1);
     context.write(new LongWritable(Long.valueOf(worker.getDepartmentNo())), worker);
    
    
    }
   
    }
 
  }

public static class DataReducer
     extends Reducer<LongWritable,WorkerInformation,LongWritable,Text> {
 

 public void reduce(Text key, Iterable<WorkerInformation> values,
                      Context context
                      ) throws IOException, InterruptedException {
    System.out.println("Reduce Methond Invoked!!!" );
    LongWritable resultKey= new LongWritable(0);
    Text resultValue=new Text();
   
    WorkerInformation department= null;
   List<WorkerInformation>  workerList=new ArrayList<WorkerInformation>();
  
    for (WorkerInformation item : values) {
     if (0 ==item.getFlag()) {
      department=new WorkerInformation(item);
     }else {
      workerList.add(new WorkerInformation(item) );
     
     }
    }
    for (WorkerInformation worker:workerList){
     worker.setDepartmentNo(department.getDepartmentNo());
     worker.setDepartmentName(department.getDepartmentName());
     resultValue.set(worker.toString());
     context.write(resultKey,resultValue );
   
    }
   
   
   }
 
 
}

public static void main(String[] args) throws Exception {
 
 
 
  Configuration conf = new Configuration();

  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: JoinWorkersInformation <in> [<in>...] <out>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "URLLog");
  job.setJarByClass(JoinWorkersInformation.class);
  job.setMapperClass(DataMapper.class);
  job.setReducerClass(DataReducer.class);
 
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(WorkerInformation.class);
 
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

class WorkerInformation implements WritableComparable{
    private String workerNo ="";
    private String workerName ="";
    private String departmentNo =""; 
    private String departmentName =""; 
    private int flag = 0; // 0 department,1 worker
   
    public WorkerInformation(){
     
    }
   
 public WorkerInformation(String workerNo, String workerName, String departmentNo, String departmentName, int flag) {
  super();
  this.workerNo = workerNo;
  this.workerName = workerName;
  this.departmentNo = departmentNo;
  this.departmentName = departmentName;
  this.flag = flag;
 }
 public WorkerInformation(WorkerInformation info){
  this.workerNo=info.departmentNo;
  this.workerName = info.workerName;
  this.departmentNo = info.departmentNo;
  this.departmentName = info.departmentName;
  this.flag = info.flag;
 }

 @Override
 public void readFields(DataInput input) throws IOException {
  this.workerNo=input.readUTF();
  this.workerName=input.readUTF();
  this.departmentNo=input.readUTF();
  this.departmentName=input.readUTF();
  this.flag=input.readInt();
 
 }

 @Override
 public void write(DataOutput output) throws IOException {
  output.writeUTF(this.workerNo);
  output.writeUTF(this.workerName);
  output.writeUTF(this.departmentNo);
  output.writeUTF(this.departmentName);
  output.writeInt(this.flag);
 }

 @Override
 public int compareTo(Object o) {
 
  return 0;
 }

 @Override
 public String toString() {
  return  this.workerNo + "   " +this.workerName+" "+this.departmentNo + "  "+this.departmentName;
 }

 public String getWorkerNo() {
  return workerNo;
 }

 public void setWorkerNo(String workerNo) {
  this.workerNo = workerNo;
 }

 public String getWorkerName() {
  return workerName;
 }

 public void setWorkerName(String workerName) {
  this.workerName = workerName;
 }

 public String getDepartmentNo() {
  return departmentNo;
 }

 public void setDepartmentNo(String departmentNo) {
  this.departmentNo = departmentNo;
 }

 public String getDepartmentName() {
  return departmentName;
 }

 public void setDepartmentName(String departmentName) {
  this.departmentName = departmentName;
 }

 public int getFlag() {
  return flag;
 }

 public void setFlag(int flag) {
  this.flag = flag;
 }
 
}

 

 

 

 

 

 


hadoop 部门职员的join 操作_第1张图片 

 

 

你可能感兴趣的:(hadoop 部门职员的join 操作)