sparksql开窗函数和udf自定义函数

package com.lenovo.sparkSqlDemo

import org.apache.spark.sql.SparkSession

//spark常用开窗函数
object SparkSqlTest2 {
  case class Person(id:Int,name:String,age:Int,salary:Double,sex:Boolean,work:String,dept:Int)
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local").appName("testSparkSql").getOrCreate()
    val persons = List(
      new Person(1, "suns1", 34, 8022.0, true, "teacher", 1),
      new Person(2, "suns2", 33, 8021.0, false, "manager", 2),
      new Person(3, "suns3", 32, 8022.0, true, "teacher", 1),
      new Person(4, "suns4", 35, 8025.0, false, "manager", 2),
      new Person(5, "suns5", 38, 8024.0, true, "teacher", 1),
      new Person(6, "suns6", 39, 8026.0, true, "woker", 3)
    )
    spark.createDataFrame(persons).createTempView("person")
    /*//先分组,然后按照降序(升序)排序,再查询
    spark.sql("select * from (select *,row_number() over(partition by dept order by salary asc) as rank from person) t where t.rank<2").show()*/
    /*//计算薪资排名(不会空出并列名次如 123445)
    spark.sql("select *,DENSE_RANK() over(order by salary DESC) rank from person").show()*/
    /*//计算薪资排名(空出并列名次如 123446)
    spark.sql("select *,rank() over(order by salary DESC) rank from person").show()*/
    /*//自定义函数的定义及使用
    spark.udf.register("yearSalary",(work:String,salary:Double)=>{
      work match {
        case "teacher" => salary*14
        case "manager" => salary*15
        case "work" => salary*13
        case _ => salary*12
      }
    })
    spark.sql("select *,yearSalary(work,salary) as yearSalary from person").show()*/
    //按工资排名,并计算每个职位的平均工资
    spark.sql("select *,avg(salary) over(partition by work) as avgSalary from person").show()
    spark.sparkContext.setLogLevel("FATAL")
    spark.stop()
  }
}

你可能感兴趣的:(spark,spark)