字段如下(年级、班级、科目、姓名、分数),分别对应grade,class,subject,name,score
求各年级各班各科目的最高分Top10 用mapreduce来写
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class StudentScoreTopN {
static class StudentScoreTopNMapper extends Mapper<LongWritable, Text, Text, DoubleWritable>{
Text k = new Text();
DoubleWritable v = new DoubleWritable() ;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
throws IOException, InterruptedException {
// 处理每行数据
String line = value.toString();
String[] split = line.split(",");
//将切割后的值进行拼接 作为key
String str = split[0] + "--" + split[1] + "--" + split[2] + "-->" + split[3] ;
// 转换成Double 作为value
Double d = Double.parseDouble(split[4]);
k.set(str);
v.set(d);
context.write(k, v);
}
}
static class StudentScoreTopNReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable>{
Map<String, Double> map = new HashMap<String , Double>();
@Override
protected void reduce(Text key, Iterable<DoubleWritable> iters,
Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context)
throws IOException, InterruptedException {
//遍历迭代器中的value
for (DoubleWritable doubleWritable : iters) {
double d = doubleWritable.get();
//放入map集合中
map.put(key.toString(), d);
}
}
@Override
protected void cleanup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context)
throws IOException, InterruptedException {
//将map集合转成list集合
Set<Entry<String, Double>> set = map.entrySet();
ArrayList<Entry<String, Double>> list = new ArrayList<Entry<String, Double>>(set);
//排序
Collections.sort(list , new Comparator<Entry<String, Double>>() {
@Override
public int compare(Entry<String, Double> o1, Entry<String, Double> o2) {
//将题目中的年级,班级,科目分离出来聚合 按照字段升序和成绩倒序排列
String[] split1 = o1.getKey().split("-->");
String[] split2 = o2.getKey().split("-->");
int i = split1[0].compareTo(split2[0]);
return i == 0 ?Double.compare(o2.getValue(), o1.getValue()) : i;
}
});
//遍历 取出各各年级各班各科目的最高分Top10 不足10的按照最大长度取值
for(int i = 0 ; i < Integer.min(10, list.size()) ; i++ ) {
Text k = new Text();
DoubleWritable v = new DoubleWritable();
k.set(list.get(i).getKey());
v.set(list.get(i).getValue());
context.write(k, v);
}
}
}
public static void main(String[] args) throws Exception {
// 1 初始化配置对象 默认程序运行在本地 本地模式运行
Configuration conf = new Configuration();
//2 初始化一个Job
Job job = Job.getInstance(conf);
//3 执行maptask 类 reducetask类
job.setMapperClass(StudentScoreTopNMapper.class);
job.setReducerClass(StudentScoreTopNReducer.class);
// 设置maptask的输出的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
// 设置reducetask的输出的kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
//4 处理数据路径
FileInputFormat.setInputPaths(job, new Path("D://map//student//1.txt"));
//5 结果输出
FileOutputFormat.setOutputPath(job, new Path("D://map//student//stu4"));
//提交Job 等待执行完毕
job.waitForCompletion(true);
}
}
自我发挥 , 不知好不好 , 有大牛可提点一二