data.txt
1949-10-01 14:21:02/t34℃
1949-10-02 14:01:02/t36℃
1950-01-01 11:21:02/t32℃
1950-10-01 12:21:02/t37℃
1951-12-01 12:21:02/t23℃
1950-10-02 12:21:02/t41℃
1950-10-03 12:21:02/t27℃
1951-07-01 12:21:02/t45℃
1951-07-02 12:21:02/t46℃
----------------------------
readme.txt
1、计算在1949-1955年,每年温度最高的时间。
2、计算在1949-1955年,每年温度最高前十天。
思路:
1、按照年份的升序排序,同时每一年中温度降序排序。
2、按照年份分组,每一年对应一个reduce任务
mapper输出:key 为封装对象,
目的:
自定排序
自定义分区
自定义分组
------------------------------------------------------------------------------------------------------------
KeyPair.java
package com.wd;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class KeyPair implements WritableComparable{
private int year;
private int hot;
public int getYear(){
return year;
}
public void setYear(int year){
this.year = year;
}
public int getHot(){
return hot;
}
public void setHot(int hot){
this.hot = hot;
}
public void readFields(DataInput in) throws IOException{
this.year = in.readInt();
this.hot = in.readInt();
}
public void write(DataOutput out) throws IOException{
out.writeInt(year);
out.writeInt(hot);
}
public int compareTo(KeyPair o){
int res = Integer.compare(year,o.getYear());
if(res!=0){
return res;
}
return Integer.compare(hot,o.getHot());
}
public String toString(){
return year + "\t" + hot;
}
}
--------------------------------------------------------------------------------------------------
SortHot.java
package com.wd;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class SortHot extends WritableComparator{
public SortHot(){
super(KeyPair.class,true);
}
public int compare(WritableComparable a,WritableComparable b){
KeyPair o1 = (KeyPair)a;
KeyPair o2 = (KeyPair)b;
int res = Integer.compare(o1.getYear(),o2.getYear());
if(res!=0){
return res;
}
return -Integer.compare(o1.getHot(),o2.getHot());//降序排序
}
}
---------------------------------------------------------------------------------------------------------
FirstPartition.java
package com.wd;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class FirstPartition extends Partitioner{
public int getPartition(KeyPair key,Text value,int num){
return (key.getYear()*127)%num;
}
}
-------------------------------------------------------------------------------------------------------------
GroupHot.java
package com.wd;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class GroupHot extends WritableComparator{
public GroupHot(){
super(KeyPair.class,true);
}
public int compare(WritableComparable a,WritableComparable b){
KeyPair o1 = (KeyPair) a;
KeyPair o2 = (KeyPair) b;
return Integer.compare(o1.getYear(), o2.getYear());
}
}
---------------------------------------------------------------------------------------------------------------
RunJob.java
package com.wd;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//hadoop jar /home/cloudera/wd.jar com.wd.RunJob
public class RunJob{
public static SimpleDateFormat SDF = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
static class HotMapper extends Mapper{
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
String line = value.toString();
System.out.println("line=" + line);
System.out.println("℃");
System.out.println("---------------------------------------");
String[] ss = line.split("/t");
System.out.println("ss=" + ss.length);
if(ss.length==2){
try{
Date date = SDF.parse(ss[0]);
System.out.println(date);
Calendar c = Calendar.getInstance();
c.setTime(date);
int year = c.get(1);
System.out.println("ss[1]" + ss[1]);
String hot = ss[1].substring(0,ss[1].indexOf("℃"));
System.out.print("hot=" + hot);
KeyPair kp = new KeyPair();
kp.setYear(year);
kp.setHot(Integer.parseInt(hot));
context.write(kp,value);
}catch(Exception e){
e.printStackTrace();
}
}
}
}
static class HotReduce extends Reducer{
protected void reduce(KeyPair kp,Iterable value,Context context)throws IOException,InterruptedException{
for(Text v:value)
context.write(kp,v);
}
}
public static void main(String[] args){
Configuration conf = new Configuration();
try{
Job job = new Job(conf);
job.setJobName("hot");
job.setJarByClass(RunJob.class);
job.setMapperClass(HotMapper.class);
job.setReducerClass(HotReduce.class);
job.setMapOutputKeyClass(KeyPair.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(4);//reduce数量
job.setPartitionerClass(FirstPartition.class);
job.setSortComparatorClass(SortHot.class);
job.setGroupingComparatorClass(GroupHot.class);
FileInputFormat.addInputPath(job,new Path("hdfs://192.168.1.198:8020/wd/input/"));
FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.1.198:8020/wd/output3/"));
System.exit(job.waitForCompletion(true)?0:1);
}catch(Exception e){
e.printStackTrace();
}
}
}
-------------------------------------------------------------------------------------------------------