MapReduce之MR处理:按课程排学生名词以及最高最低平均分数

根据此篇文章可以熟练使用Reduce阶段进行分组,并且了解jdk8新特性代码。

我们开看下原始数据

某地方编辑的txt文件数据不是太大:

English,liudehua,80
English,lijing,79
English,nezha,85
English,jinzha,60
English,muzha,71
English,houzi,99
English,libai,88
English,hanxin,66
English,zhugeliang,95
Math,liudehua,74
Math,lijing,72
Math,nezha,95
Math,jinzha,61
Math,muzha,37
Math,houzi,37
Math,libai,84
Math,hanxin,89
Math,zhugeliang,93
Computer,liudehua,54
Computer,lijing,73
Computer,nezha,86
Computer,jinzha,96
Computer,muzha,76
Computer,houzi,92
Computer,libai,73
Computer,hanxin,82
Computer,zhugeliang,100

一、MR之学生成绩,最高,最低,平均分数。

    1.Mapper阶段代码

package com.studentExam.avgscore;

/**
 * $功能描述: AvgscoreMapper
 *
 * @author :smart-dxw
 * @version : 2019/6/19 21:58 v1.0
 */

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class AvgMapper extends Mapper {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 部曲
        String line = value.toString();
        String[] s = line.split(",");
        // 这里k根据姓名区分
        context.write(new Text(s[1]), new IntWritable(Integer.parseInt(s[2])));
    }
}

    2.Reducer阶段代码

package com.studentExam.avgscore;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.IntSummaryStatistics;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;

/**
 * $功能描述: AvgReducer
 *
 * @author :smart-dxw
 * @version : 2019/6/19 22:01 v1.0
 */
public class AvgReducer extends Reducer {
    @Override
    protected void reduce(Text key, Iterable value, Context context) throws IOException, InterruptedException {
        // 接收 成绩
        List scores = new ArrayList();
        // reduce阶段获取成绩
        Iterator it = value.iterator();
        while (it.hasNext()) {
            scores.add(it.next().get());
        }

        // 获取集合的元素数量 总和 最小 平均 最大
//      IntSummaryStatistics{count=3, sum=237, min=66, average=79.000000, max=89}
        IntSummaryStatistics score = scores.stream().collect(Collectors.summarizingInt(Integer::intValue));
        // 输出 Math.round() 方法返回一个最接近的 int、long 型值,四舍五入。
        context.write(key, new Text(score.getMax() + "\t" + score.getMin() + "\t" + Math.round(score.getAverage())));
    }
}

3.Job类这里job类定义了一个公共的驱动类

package com.studentExam.avgscore;

import com.Drive;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;
import java.net.URISyntaxException;

/**
 * $功能描述: run
 *
 * @author :smart-dxw
 * @version : 2019/6/19 22:02 v1.0
 */
public class AvgRun {
    public static void main(String[] args) throws ClassNotFoundException, URISyntaxException, InterruptedException, IOException {
        args = new String[]{
                "C:\\studentExam\\01\\in",
                "C:\\studentExam\\01\\AvgRun"
        };
        Drive.run(AvgRun.class,
                AvgMapper.class,
                Text.class,
                IntWritable.class,
                AvgReducer.class,
                Text.class,
                Text.class,
                args[0],
                args[1]);
    }
}

结果

hanxin	89	66	79
houzi	99	37	76
jinzha	96	60	72
libai	88	73	82
lijing	79	72	75
liudehua	80	54	69
muzha	76	37	61
nezha	95	85	89
zhugeliang	100	93	96

 

二、根据课程得出高、中、低的学生以及人数。

数据材料还是上边的第一个原始数据:

    1.Mapper阶段代码

package com.studentExam.classscore;

/**
 * $功能描述: AvgscoreMapper
 *
 * @author :smart-dxw
 * @version : 2019/6/19 21:58 v1.0
 */

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class AvgClassMapper extends Mapper {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] s = line.split(",");
        // 去重班级 拼接 姓名与成绩在reduce阶段进行过滤
        context.write(new Text(s[0]), new Text(s[1] + "," + s[2]));
    }
}

    2.Reduce阶段代码

package com.studentExam.classscore;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

/**
 * $功能描述: AvgReducer
 *
 * @author :smart-dxw
 * @version : 2019/6/19 22:01 v1.0
 */
public class AvgClassReducer extends Reducer {

    Text k = new Text();
    Text v = new Text();

    @Override
    protected void reduce(Text key, Iterable value, Context context) throws IOException, InterruptedException {
        // 高 中 低 学生
        String hStr = "";
        String mStr = "";
        String lgStr = "";

        // reduce阶段获取学生
        Iterator text = value.iterator();
        while (text.hasNext()) {
            String[] split = text.next().toString().split(",");
            int score = Integer.parseInt(split[1]);
            // 高级(90及以上)中级(80到89)低级(0到79)
            if (score >= 90) {
                hStr += ", " + split[0];
            } else if (score >= 80 && score <= 89) {
                mStr += ", " + split[0];
            } else if (score <= 70) {
                lStr += ", " + split[0];
            }
        }
        // 过滤掉 ", "
        hStr = hStr.substring(2);
        mStr = mStr.substring(2);
        lStr = lStr.substring(2);
        // 封装key
        k.set("课程" + key.toString() + ":\n");
        // 封装value 这里split切分因为拼接是 ", "所以切分应该是", "因为结果展示是", "相对应格式化结果
        String s = "高:\t" + hStr + "\t总人数:" + hStr.split(", ").length + "人\n"
                + "\t中:\t" + mStr + "\t总人数:" + mStr.split(", ").length + "人\n"
                + "\t低:\t" + lStr + "\t总人数:" + lStr.split(", ").length + "人\n";
        v.set(s);
        context.write(k, v);
    }
}

3.Job阶段代码

package com.studentExam.classscore;

import com.Drive;
import org.apache.hadoop.io.Text;

import java.io.IOException;
import java.net.URISyntaxException;

/**
 * $功能描述: run
 *
 * @author :smart-dxw
 * @version : 2019/6/19 22:02 v1.0
 */
public class AvgClassRun {

    public static void main(String[] args) throws ClassNotFoundException, URISyntaxException, InterruptedException, IOException {
        args = new String[]{
                "C:\\studentExam\\01\\in",
                "C:\\studentExam\\01\\AvgClassRun"
        };
        Drive.run(AvgClassRun.class,
                AvgClassMapper.class,
                Text.class,
                Text.class,
                AvgClassReducer.class,
                Text.class,
                Text.class,
                args[0],
                args[1]);
    }
}

结果

课程Computer:
	甲级:	zhugeliang, houzi, jinzha	总人数:3人
	乙级:	hanxin, nezha	总人数:2人
	丙级:	liudehua	总人数:1人

课程English:
	甲级:	zhugeliang, houzi	总人数:2人
	乙级:	liudehua, libai, nezha	总人数:3人
	丙级:	hanxin, jinzha	总人数:2人

课程Math:
	甲级:	zhugeliang, nezha	总人数:2人
	乙级:	hanxin, libai	总人数:2人
	丙级:	muzha, houzi, jinzha	总人数:3人

感谢老铁支持:驱动类看另外一个地址

https://blog.csdn.net/hengyufxh1/article/details/93249741

老铁加油:我在工地等你!

 

 

你可能感兴趣的:(大数据,Hadoop,MR,MapReduce)