根据此篇文章可以熟练使用Reduce阶段进行分组,并且了解jdk8新特性代码。
我们开看下原始数据
某地方编辑的txt文件数据不是太大:
English,liudehua,80
English,lijing,79
English,nezha,85
English,jinzha,60
English,muzha,71
English,houzi,99
English,libai,88
English,hanxin,66
English,zhugeliang,95
Math,liudehua,74
Math,lijing,72
Math,nezha,95
Math,jinzha,61
Math,muzha,37
Math,houzi,37
Math,libai,84
Math,hanxin,89
Math,zhugeliang,93
Computer,liudehua,54
Computer,lijing,73
Computer,nezha,86
Computer,jinzha,96
Computer,muzha,76
Computer,houzi,92
Computer,libai,73
Computer,hanxin,82
Computer,zhugeliang,100
一、MR之学生成绩,最高,最低,平均分数。
1.Mapper阶段代码
package com.studentExam.avgscore;
/**
* $功能描述: AvgscoreMapper
*
* @author :smart-dxw
* @version : 2019/6/19 21:58 v1.0
*/
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AvgMapper extends Mapper {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 部曲
String line = value.toString();
String[] s = line.split(",");
// 这里k根据姓名区分
context.write(new Text(s[1]), new IntWritable(Integer.parseInt(s[2])));
}
}
2.Reducer阶段代码
package com.studentExam.avgscore;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.IntSummaryStatistics;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
/**
* $功能描述: AvgReducer
*
* @author :smart-dxw
* @version : 2019/6/19 22:01 v1.0
*/
public class AvgReducer extends Reducer {
@Override
protected void reduce(Text key, Iterable value, Context context) throws IOException, InterruptedException {
// 接收 成绩
List scores = new ArrayList();
// reduce阶段获取成绩
Iterator it = value.iterator();
while (it.hasNext()) {
scores.add(it.next().get());
}
// 获取集合的元素数量 总和 最小 平均 最大
// IntSummaryStatistics{count=3, sum=237, min=66, average=79.000000, max=89}
IntSummaryStatistics score = scores.stream().collect(Collectors.summarizingInt(Integer::intValue));
// 输出 Math.round() 方法返回一个最接近的 int、long 型值,四舍五入。
context.write(key, new Text(score.getMax() + "\t" + score.getMin() + "\t" + Math.round(score.getAverage())));
}
}
3.Job类这里job类定义了一个公共的驱动类
package com.studentExam.avgscore;
import com.Drive;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.net.URISyntaxException;
/**
* $功能描述: run
*
* @author :smart-dxw
* @version : 2019/6/19 22:02 v1.0
*/
public class AvgRun {
public static void main(String[] args) throws ClassNotFoundException, URISyntaxException, InterruptedException, IOException {
args = new String[]{
"C:\\studentExam\\01\\in",
"C:\\studentExam\\01\\AvgRun"
};
Drive.run(AvgRun.class,
AvgMapper.class,
Text.class,
IntWritable.class,
AvgReducer.class,
Text.class,
Text.class,
args[0],
args[1]);
}
}
结果
hanxin 89 66 79
houzi 99 37 76
jinzha 96 60 72
libai 88 73 82
lijing 79 72 75
liudehua 80 54 69
muzha 76 37 61
nezha 95 85 89
zhugeliang 100 93 96
二、根据课程得出高、中、低的学生以及人数。
数据材料还是上边的第一个原始数据:
1.Mapper阶段代码
package com.studentExam.classscore;
/**
* $功能描述: AvgscoreMapper
*
* @author :smart-dxw
* @version : 2019/6/19 21:58 v1.0
*/
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AvgClassMapper extends Mapper {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] s = line.split(",");
// 去重班级 拼接 姓名与成绩在reduce阶段进行过滤
context.write(new Text(s[0]), new Text(s[1] + "," + s[2]));
}
}
2.Reduce阶段代码
package com.studentExam.classscore;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
/**
* $功能描述: AvgReducer
*
* @author :smart-dxw
* @version : 2019/6/19 22:01 v1.0
*/
public class AvgClassReducer extends Reducer {
Text k = new Text();
Text v = new Text();
@Override
protected void reduce(Text key, Iterable value, Context context) throws IOException, InterruptedException {
// 高 中 低 学生
String hStr = "";
String mStr = "";
String lgStr = "";
// reduce阶段获取学生
Iterator text = value.iterator();
while (text.hasNext()) {
String[] split = text.next().toString().split(",");
int score = Integer.parseInt(split[1]);
// 高级(90及以上)中级(80到89)低级(0到79)
if (score >= 90) {
hStr += ", " + split[0];
} else if (score >= 80 && score <= 89) {
mStr += ", " + split[0];
} else if (score <= 70) {
lStr += ", " + split[0];
}
}
// 过滤掉 ", "
hStr = hStr.substring(2);
mStr = mStr.substring(2);
lStr = lStr.substring(2);
// 封装key
k.set("课程" + key.toString() + ":\n");
// 封装value 这里split切分因为拼接是 ", "所以切分应该是", "因为结果展示是", "相对应格式化结果
String s = "高:\t" + hStr + "\t总人数:" + hStr.split(", ").length + "人\n"
+ "\t中:\t" + mStr + "\t总人数:" + mStr.split(", ").length + "人\n"
+ "\t低:\t" + lStr + "\t总人数:" + lStr.split(", ").length + "人\n";
v.set(s);
context.write(k, v);
}
}
3.Job阶段代码
package com.studentExam.classscore;
import com.Drive;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.net.URISyntaxException;
/**
* $功能描述: run
*
* @author :smart-dxw
* @version : 2019/6/19 22:02 v1.0
*/
public class AvgClassRun {
public static void main(String[] args) throws ClassNotFoundException, URISyntaxException, InterruptedException, IOException {
args = new String[]{
"C:\\studentExam\\01\\in",
"C:\\studentExam\\01\\AvgClassRun"
};
Drive.run(AvgClassRun.class,
AvgClassMapper.class,
Text.class,
Text.class,
AvgClassReducer.class,
Text.class,
Text.class,
args[0],
args[1]);
}
}
结果
课程Computer:
甲级: zhugeliang, houzi, jinzha 总人数:3人
乙级: hanxin, nezha 总人数:2人
丙级: liudehua 总人数:1人
课程English:
甲级: zhugeliang, houzi 总人数:2人
乙级: liudehua, libai, nezha 总人数:3人
丙级: hanxin, jinzha 总人数:2人
课程Math:
甲级: zhugeliang, nezha 总人数:2人
乙级: hanxin, libai 总人数:2人
丙级: muzha, houzi, jinzha 总人数:3人
感谢老铁支持:驱动类看另外一个地址
https://blog.csdn.net/hengyufxh1/article/details/93249741
老铁加油:我在工地等你!