package com.jiangnan.spark
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
/**
* 姓名 班级 分数
* 101 1 90
* 120 1 88
*/
case class Score(name:String,clazz:Int,score: Int)
object OpenFunction extends App {
val conf = new SparkConf().setAppName("").setMaster("local[2]")
val spark = SparkSession.builder().config(conf).getOrCreate()
import spark.implicits._
//生产数据
val score = spark.sparkContext.makeRDD(List(
Score("1001",1,90),
Score("1002",2,95),
Score("1003",3,90),
Score("1004",1,92),
Score("1005",1,88),
Score("1005",2,66),
Score("1005",3,90),
Score("1005",1,93),
Score("1005",3,99),
Score("1005",2,90),
Score("1005",2,92),
Score("1005",3,90)
)).toDF("name","clazz","score")
println("---------原始数据----------")
score.show()
println("---------求每个班级最高成绩的学生---原始做法------")
//创建一个表
score.createOrReplaceTempView("score")
println("-----------分组后求出每个班最高分数表结构----------")
spark.sql("select clazz,max(score) max from score group by clazz").show()
println("-----------原始做法,最终结果--------------")
spark.sql("select a.name,b.clazz,b.max from score a,(select clazz,max(score) max from score group by clazz) b where a.score = b.max").show()
println("------------使用开窗函数后的运算过程-------------")
spark.sql("select name,clazz,score,rank() over(partition by clazz order by score desc) rank from score").show()
println("------------使用开窗函数后最终结果-------------")
spark.sql("select * from (select name,clazz,score,rank() over(partition by clazz order by score desc) rank from score) r where r.rank = 1").show()
spark.stop()