面试题 之 MAPREDUCE

question:

字段如下(年级、班级、科目、姓名、分数),分别对应grade,class,subject,name,score
求各年级各班各科目的最高分Top10 用mapreduce来写

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class StudentScoreTopN {
 static class StudentScoreTopNMapper extends Mapper<LongWritable, Text, Text, DoubleWritable>{
  Text k = new Text();
  DoubleWritable v = new DoubleWritable() ;
  @Override
  protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
    throws IOException, InterruptedException {
   // 处理每行数据
   String line = value.toString();
   String[] split = line.split(","); 
   //将切割后的值进行拼接  作为key
   String str = split[0] + "--" + split[1] + "--" +  split[2] + "-->" + split[3] ;
   //  转换成Double 作为value
   Double d = Double.parseDouble(split[4]);
   k.set(str);
   v.set(d);
   context.write(k, v); 
  }
 }
static class StudentScoreTopNReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable>{
  Map<String, Double> map = new  HashMap<String , Double>();
  @Override
  protected void reduce(Text key, Iterable<DoubleWritable> iters,
    Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context)
    throws IOException, InterruptedException {
   //遍历迭代器中的value
   for (DoubleWritable doubleWritable : iters) {
    double d = doubleWritable.get();
   //放入map集合中 
    map.put(key.toString(), d);
   }
  }
  @Override
  protected void cleanup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context)
    throws IOException, InterruptedException { 
   //将map集合转成list集合
   Set<Entry<String, Double>> set = map.entrySet();
   ArrayList<Entry<String, Double>> list = new ArrayList<Entry<String, Double>>(set);
   //排序
   Collections.sort(list , new Comparator<Entry<String, Double>>() {
    @Override
    public int compare(Entry<String, Double> o1, Entry<String, Double> o2) { 
     //将题目中的年级,班级,科目分离出来聚合  按照字段升序和成绩倒序排列
     String[] split1 = o1.getKey().split("-->");
     String[] split2 = o2.getKey().split("-->");
     int i = split1[0].compareTo(split2[0]);
     return i == 0 ?Double.compare(o2.getValue(), o1.getValue()) : i;
    }
   });
   //遍历  取出各各年级各班各科目的最高分Top10    不足10的按照最大长度取值
   for(int i = 0 ; i < Integer.min(10, list.size()) ; i++ ) {
    Text k = new Text();
    DoubleWritable v = new DoubleWritable();
    k.set(list.get(i).getKey());
    v.set(list.get(i).getValue());
    context.write(k, v);
   }
  }
 }
public static void main(String[] args) throws Exception {
  // 1 初始化配置对象  默认程序运行在本地  本地模式运行
  Configuration conf = new  Configuration();
  //2 初始化一个Job
  Job job = Job.getInstance(conf);
  //3  执行maptask 类  reducetask类
  job.setMapperClass(StudentScoreTopNMapper.class);
  job.setReducerClass(StudentScoreTopNReducer.class);
  //  设置maptask的输出的kv类型
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);
  //  设置reducetask的输出的kv类型
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);
  //4  处理数据路径
  FileInputFormat.setInputPaths(job, new Path("D://map//student//1.txt"));
  //5 结果输出
  FileOutputFormat.setOutputPath(job, new Path("D://map//student//stu4"));
  //提交Job  等待执行完毕
  job.waitForCompletion(true);
 }
}

自我发挥 , 不知好不好 , 有大牛可提点一二

你可能感兴趣的:(面试题 之 MAPREDUCE)