spark的topn问题

1、按第一个字段从大到小降序取出前三个字段

3,zhangsan
5,lisi
6,wangwu
7,wermaziang
1,bjsxt
4,shsxt
5,xiansxt
6,gzsxt
9,laogao
0,xiaogao
3,laoxiao
JavaRDD linesRDD = sc.textFile("top.txt");
JavaPairRDD pairRDD = linesRDD.mapToPair(new PairFunction() {
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2 call(String val) throws Exception {
        String[] splited = val.split(",");
        Integer userId = Integer.valueOf(splited[0]);
        return new Tuple2(userId,val);
    }
});
JavaPairRDD sortByKeyRDD = pairRDD.sortByKey(false);
List> top3 = sortByKeyRDD.take(3);
for (Tuple2 tuple2 : top3) {
    System.out.println(tuple2._2);
}

2、取出每门课程的分数前三个

class1  100
class2 85
class3 70
class1 102
class2 65
class1 45
class2 85
class3 70
class1 16
class2 88
class1 95
class2 37
class3 98
class1 99
class2 23
class4 2
JavaRDD linesRDD = sc.textFile("scores.txt");
JavaPairRDD pairRDD = linesRDD.mapToPair(new PairFunction() {
    @Override
    public Tuple2 call(String s) throws Exception {
        String[] split = s.split("\t");
        String className = split[0];
        Integer scores = Integer.parseInt(split[1]);
        Tuple2 tuple2 = new Tuple2(className, scores);
        return tuple2;
    }
});
JavaPairRDD> stringIterableJavaPairRDD = pairRDD.groupByKey();
stringIterableJavaPairRDD.foreach(new VoidFunction>>() {
    @Override
    public void call(Tuple2> tuple) throws Exception {
        String className = tuple._1;
        //Iterable scores = tuple._2
        Iterator scores = tuple._2.iterator();
        Integer[] scoreTop3 = new Integer[3];
        while (scores.hasNext()){
            Integer score = scores.next();
            for (int i = 0; i < scoreTop3.length; i++) {
                if(scoreTop3[i] == null){
                    scoreTop3[i] = score;
                    break;
                }else if(score > scoreTop3[i]){
                    for (int j = 2; j > i; j--) {
                        scoreTop3[j] = scoreTop3[j-1];
                    }
                    scoreTop3[i] = score;
                    break;
                }
            }
        }
        System.out.println("class Name:"+className);
        for(Integer sscore : scoreTop3){
            System.out.println(sscore);
        }
    }
});

你可能感兴趣的:(大数据,spark)