Hadoop multipleoutputs使用<转>

package com.loganalysis;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputs;



public class CK {

  public static class Key implements WritableComparable<Key> {
    public String plate_id = "";
    public String hour_id = "";
    
//    public Key(String p, String h){
//      this.plate_id = p;
//      this.hour_id = h;
//    }
    
    public String getPlateid(){
      return this.plate_id;
    }
    
    public String getHourid(){
      return this.hour_id;
    }
    
    public void set(String p, String h){
      this.plate_id = p;
      this.hour_id = h;
    }
    
    @Override
    public void write(DataOutput out) throws IOException {
      out.writeUTF(this.plate_id);
      out.writeUTF(this.hour_id);      
    }
    
    @Override
    public void readFields(DataInput in) throws IOException {
      this.plate_id = in.readUTF();
      this.hour_id = in.readUTF();
    }
    
    @Override
    public boolean equals(Object k) {
      if (k instanceof Key) {
        Key r = (Key) k;
        return this.plate_id.equals(((Key) k).plate_id) && this.hour_id.equals(((Key) k).hour_id);
      } else {
        return false;
      }
    }
    
    public String toString(){
      return this.plate_id +"\t"+ this.hour_id;
    }
    
    @Override
    public int compareTo(Key k) {
      int cmp = this.plate_id.compareTo(k.plate_id);
      
      if(cmp != 0){
        return cmp;
      }
      
      if(this.hour_id.compareTo(k.hour_id) > 0){
        return 1;
      }else if(this.hour_id.compareTo(k.hour_id) < 0){
        return -1;
      }else{
        return 0;
      }
      
    }       
  }
  
  public static class MapClass extends MapReduceBase implements
      Mapper<LongWritable, Text, Key, Text> {
    //private Key plateHour = new Key();
    private Text word = new Text();
    private Key plateHour = new Key();
    @Override
    public void map(LongWritable key,
                    Text value,
                    OutputCollector<Key, Text> output,
                    Reporter reporter) throws IOException {
      String [] line = value.toString().split("\t");
      String cookieId = line[0];
      String hourId = line[1].substring(11, 13);
      String plateId = line[3];      
      
      plateHour.set(plateId,hourId);
      word.set(hourId + "\t" + cookieId);
      output.collect(plateHour, word);      
    }
    
      
  }
  
  public static class Reduce extends MapReduceBase implements Reducer<Key, Text, Text, Text> {
    private Text word = new Text();
    private Text hour_word = new Text();
    private Text hour_total = new Text();
    private Text total_total = new Text();
    private Map<String, Integer> hour_map = new HashMap<String, Integer>();
    private Map<String, Integer> total_map = new HashMap<String, Integer>();
    
    private MultipleOutputs mop;
    
    @Override
    public void configure(JobConf conf){
      mop = new MultipleOutputs(conf);
    }
    
    @Override
    public void close() throws IOException{
      mop.close();
    }
    
    @Override
    public void reduce(Key key,
                       Iterator<Text> values,
                       OutputCollector<Text, Text> output,
                       Reporter reporter) throws IOException {
      String newHour = "";
      int count = 0;
      
      total_map.clear();
      while(values.hasNext()) {        
        String [] value = values.next().toString().split("\t");
        
        //按小時UV輸出
        if(!newHour.equals(value[0])){     
          if(count != 0){
            hour_total.set(Integer.toString(hour_map.size()));
            hour_word.set(key.getPlateid() + "\t" + newHour);
            //output.collect(hour_word, hour_total);        
             mop.getCollector("fix1", "fix1", reporter).collect(hour_word, hour_total);
          }
          newHour = value[0];          
          hour_map.clear();
          count = 0;
          hour_map.put(value[1], 0);
          count ++;
        }else{
          hour_map.put(value[1], 0);
          count ++;
        }
        
        total_map.put(value[1], 0);        
        
      }
      //按小時UV輸出
      hour_total.set(Integer.toString(hour_map.size()));
      hour_word.set(key.getPlateid() + "\t" + newHour);
      //output.collect(hour_word, hour_total);
       mop.getCollector("fix1", "fix1", reporter).collect(hour_word, hour_total);
      
      //總UV輸出
      total_total.set(Integer.toString(total_map.size()));
      word.set(key.getPlateid());
      //output.collect(word, total_total);
       mop.getCollector("fix2", "fix2", reporter).collect(word, total_total);
      
    }
  }

 

  public static class FirstGroupingComparator extends WritableComparator {

    protected FirstGroupingComparator() {
      super(Key.class,true);
    }
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
      return ((Key)a).getPlateid().compareTo(((Key)b).getPlateid());
      //一定通过第一个key进行分组
    }
    
  }

  public static class FirstPartitioner implements Partitioner<Key, Text> {

    @Override
    public int getPartition(Key key, Text value, int numPartitions) {
      return key.getPlateid().hashCode()&Integer.MAX_VALUE%numPartitions;
    }

    @Override
    public void configure(JobConf job) {
      // TODO Auto-generated method stub
      
    }    
  }

  public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(CK.class);
    conf.setJobName("Composite key");

    // 设置Map输出的key和value的类型
    conf.setMapOutputKeyClass(Key.class);
    conf.setMapOutputValueClass(Text.class);

    // 设置Reduce输出的key和value的类型
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // 设置Mapper和Reducer
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);

    // 设置group函数和分区函数
    conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
    conf.setPartitionerClass(FirstPartitioner.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    // conf.setOutputFormat(SequenceFileOutputFormat.class);
     MultipleOutputs.addMultiNamedOutput(conf, "fix1", TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, "fix2", TextOutputFormat.class, Text.class, Text.class);
    
    // 如果输出目录已经存在,那么先将其删除
    FileSystem fstm = FileSystem.get(conf);
    Path outDir = new Path(args[1]);
    fstm.delete(outDir, true);

    // 设置输入输出目录
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, outDir);

    JobClient.runJob(conf);

    
  }
}

你可能感兴趣的:(Hadoop multipleoutputs使用<转>)