wordcount 学生成绩普通版

package com.Practice.StudentScores;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * 题目:学生成绩普通版
 *
 *   computer,huangxiaoming,85
     computer,xuzheng,54
     computer,huangbo,86
     computer,liutao,85
     computer,huanglei,99
     computer,liujialing,85
     computer,liuyifei,75
     computer,huangdatou,48
     computer,huangjiaju,88
     computer,huangzitao,85
     english,zhaobenshan,57
     english,liuyifei,85
     english,liuyifei,76
     english,huangdatou,48
     english,zhouqi,85
     english,huangbo,85
     english,huangxiaoming,96
     english,huanglei,85
     english,liujialing,75
     algorithm,liuyifei,75
     algorithm,huanglei,76
     algorithm,huangjiaju,85
     algorithm,liutao,85
     algorithm,huangdou,42
     algorithm,huangzitao,81
     math,wangbaoqiang,85
     math,huanglei,76
     math,huangjiaju,85
     math,liutao,48
     math,xuzheng,54
     math,huangxiaoming,85
     math,liujialing,85
 *
 * 1、每一个course的最高分,最低分,平均分
 返回结果格式:
 course max=95  min=22  avg=55
 例子:
 computer   max=99  min=48  avg=75

 解题思路:在map以course作为key值,其余部分作为value,在reduce中设置变量max,min,avg,通过累计求出,并设置格式

 2、求该成绩表当中出现了相同分数的分数,还有次数,以及该分数的人
 返回结果的格式:
 科目 分数  次数  该分数的人
 例子:
 computer   85  3   huangzitao,liujialing,huangxiaoming

 解题思路:求某科目中出现系统分数的人数以及分数,map以科目和分数作为key值,进行分组,在reduce中进行计数,当计数结果大于1时,输出分数,人数和人名
 */
public class StudentScores1 {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        Job job = Job.getInstance(conf);
        job.setJar("wordcountJar/wordcount.jar");
//        job.setMapperClass(StudentScoresMapper.class);
//        job.setReducerClass(StudentScoresReducer.class);
        job.setMapperClass(StudentScoresMapper2.class);
        job.setReducerClass(StudentScoresReducer2.class);

//        job.setOutputKeyClass(Text.class);
//        job.setOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        Path inputPath = new Path("input/studentScores");
        Path outputPath = new Path("output/studentScores");

        if(fs.isDirectory(outputPath)){
            fs.delete(outputPath,true);
        }

        FileInputFormat.setInputPaths(job,inputPath);
        FileOutputFormat.setOutputPath(job,outputPath);

        Boolean waitForCompletion = job.waitForCompletion(true);
        System.exit(waitForCompletion ? 0 : 1);


    }

    /**
     * 第一题
     */
    public static class StudentScoresMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
        private IntWritable outValue = new IntWritable();
        private Text outKey = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] splits = value.toString().split(",");
            outKey.set(splits[0]);
            outValue.set(Integer.parseInt(splits[2]));
            context.write(outKey,outValue);
        }
    }

    /**
     * 第一题
     */
    public static class StudentScoresReducer extends Reducer<Text,IntWritable,Text,Text> {
        private Text outValue = new Text();
        @Override
        protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
            int min = 1000 ;
            int max = 0 ;
            int avg  ;
            int sum = 0 ;
            int count = 0 ;
            //方法一:最大最小通过逐一比较得到
//            for (IntWritable val:
//                 values) {
//                int score = val.get();
//                if(max  < score){
//                    max = score;
//                }
//                if(min > score){
//                    min = score ;
//                }
//                sum += score ;
//                count++ ;
//            }
//                avg = sum /count ;
//                String outStr = "max="+max+" min="+min+" avg="+avg;
//                outValue.set(outStr);

            //方法二:最大最小值通过集合数组得到
            List scores = new ArrayList<>();
            for (IntWritable val :
                    values) {
                scores.add(val.get());
                sum += val.get();
                count++;
            }
            Collections.sort(scores);
            min = scores.get(0);
            max = scores.get(scores.size()-1);
            avg = sum / count ;
            String outStr = "max="+max+" min="+min+" avg="+avg;
            outValue.set(outStr);
            context.write(key,outValue);
        }
    }

    /**
     * 第二题
     */
    public static class StudentScoresMapper2 extends Mapper<LongWritable,Text,Text,Text>{
        private Text outKey = new Text();
        private Text outValue = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] splits = value.toString().split(",");
            outKey.set(splits[0]+"\t"+splits[2]);
            outValue.set(splits[1]);
            context.write(outKey,outValue);
        }
    }

    /**
     * 第二题
     */
    public static class StudentScoresReducer2 extends Reducer<Text,Text,Text,Text>{
        private Text outValue = new Text();
        @Override
        protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            int count = 0 ;
            for (Text text :
                    values) {
                if(sb.length()!=0){
                    sb.append(",");
                }
                sb.append(text);
                count++;
            }
            //如果count大于等于2,说明有分数重合的
            if(count >= 2 ){
                outValue.set(count+" "+sb.toString());
                context.write(key,outValue);
            }
        }
    }
}

你可能感兴趣的:(hadoop-2-x)