hadoop 排序、分区、分组实例

data.txt
1949-10-01 14:21:02/t34℃
1949-10-02 14:01:02/t36℃
1950-01-01 11:21:02/t32℃
1950-10-01 12:21:02/t37℃
1951-12-01 12:21:02/t23℃
1950-10-02 12:21:02/t41℃
1950-10-03 12:21:02/t27℃
1951-07-01 12:21:02/t45℃
1951-07-02 12:21:02/t46℃
----------------------------
readme.txt
1、计算在1949-1955年,每年温度最高的时间。
2、计算在1949-1955年,每年温度最高前十天。


思路:
    1、按照年份的升序排序,同时每一年中温度降序排序。
    2、按照年份分组,每一年对应一个reduce任务
    mapper输出:key 为封装对象,




目的:
    自定排序
    自定义分区
    自定义分组
------------------------------------------------------------------------------------------------------------
KeyPair.java


package com.wd;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class KeyPair implements WritableComparable{
    private int year;
    private int hot;
    public int getYear(){
        return year;
    }
    public void setYear(int year){
        this.year = year;
    }
    public int getHot(){
        return hot;
    }
    public void setHot(int hot){
        this.hot = hot;
    }
    
    public void readFields(DataInput in) throws IOException{
        this.year = in.readInt();
        this.hot = in.readInt();
    }
    
    public void write(DataOutput out) throws IOException{
        out.writeInt(year);
        out.writeInt(hot);
    }
    
    public int compareTo(KeyPair o){
        int res = Integer.compare(year,o.getYear());
if(res!=0){
   return res;
}
return Integer.compare(hot,o.getHot());
    }


    public String toString(){
        return year + "\t" + hot;
    }
}
--------------------------------------------------------------------------------------------------


SortHot.java
package com.wd;


import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class SortHot extends WritableComparator{
    public SortHot(){
        super(KeyPair.class,true);
    }
    public int compare(WritableComparable a,WritableComparable b){
        KeyPair o1 = (KeyPair)a;
KeyPair o2 = (KeyPair)b;
int res = Integer.compare(o1.getYear(),o2.getYear());
if(res!=0){
   return res;
}
return -Integer.compare(o1.getHot(),o2.getHot());//降序排序
    }
}


---------------------------------------------------------------------------------------------------------
FirstPartition.java
package com.wd;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public  class FirstPartition extends Partitioner{
    public int getPartition(KeyPair key,Text value,int num){
        return (key.getYear()*127)%num;
    }
}




-------------------------------------------------------------------------------------------------------------
GroupHot.java
package com.wd;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class GroupHot extends WritableComparator{
    public GroupHot(){
    super(KeyPair.class,true);
    }
    
    public int compare(WritableComparable a,WritableComparable b){
    KeyPair o1 = (KeyPair) a;
    KeyPair o2 = (KeyPair) b;
    return Integer.compare(o1.getYear(), o2.getYear());
    }
}


---------------------------------------------------------------------------------------------------------------
RunJob.java
package com.wd;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


//hadoop jar /home/cloudera/wd.jar com.wd.RunJob
public class RunJob{
public static SimpleDateFormat SDF = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
static class HotMapper extends Mapper{
   protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
   String line = value.toString();
   System.out.println("line=" + line);
   System.out.println("℃");
   System.out.println("---------------------------------------");
   String[] ss = line.split("/t");
   System.out.println("ss=" + ss.length);
   if(ss.length==2){
       try{
   Date date = SDF.parse(ss[0]);
  System.out.println(date);
   Calendar c = Calendar.getInstance();
   c.setTime(date);
   int year = c.get(1);
   System.out.println("ss[1]" + ss[1]);
   String hot = ss[1].substring(0,ss[1].indexOf("℃"));
   System.out.print("hot=" + hot);
   KeyPair kp = new KeyPair();
   kp.setYear(year);
   kp.setHot(Integer.parseInt(hot));
   context.write(kp,value);
}catch(Exception e){
   e.printStackTrace();
}
       
   }
}
}

static class HotReduce extends Reducer{
   protected void reduce(KeyPair kp,Iterable value,Context context)throws IOException,InterruptedException{
       for(Text v:value)
           context.write(kp,v);
       }
}

public static void main(String[] args){
Configuration conf = new Configuration();
try{
   Job job = new Job(conf);
   job.setJobName("hot");
   job.setJarByClass(RunJob.class);
   job.setMapperClass(HotMapper.class);
   job.setReducerClass(HotReduce.class);
   job.setMapOutputKeyClass(KeyPair.class);
   job.setMapOutputValueClass(Text.class);
   
   job.setNumReduceTasks(4);//reduce数量
   job.setPartitionerClass(FirstPartition.class);
   job.setSortComparatorClass(SortHot.class);
   job.setGroupingComparatorClass(GroupHot.class);
   
   FileInputFormat.addInputPath(job,new Path("hdfs://192.168.1.198:8020/wd/input/"));
   FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.1.198:8020/wd/output3/"));
   System.exit(job.waitForCompletion(true)?0:1);
}catch(Exception e){
   e.printStackTrace();
}
}


}
-------------------------------------------------------------------------------------------------------









































































































































































































你可能感兴趣的:(hadoop)