Spark SQL之IDEA中的应用

  • pom中添加依赖

    org.apache.spark
    spark-sql_2.12
    3.0.0
  • Test 1
package test.wyh.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object TestSQL {

  def main(args: Array[String]): Unit = {
    //创建SparkSQL的运行环境
    val sparkSQLConf = new SparkConf().setMaster("local[*]").setAppName("testSparkSQL")
    val sparkSession = SparkSession.builder().config(sparkSQLConf).getOrCreate()

    //DataFrame
    val df = sparkSession.read.json("raw_data/student.json")
    df.show()

  }
}

创建student.json: 

Spark SQL之IDEA中的应用_第1张图片

运行结果:

Spark SQL之IDEA中的应用_第2张图片

  •  Test 2
package test.wyh.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object TestSQL {

  def main(args: Array[String]): Unit = {
    //创建SparkSQL的运行环境
    val sparkSQLConf = new SparkConf().setMaster("local[*]").setAppName("testSparkSQL")
    val sparkSession = SparkSession.builder().config(sparkSQLConf).getOrCreate()

    //DataFrame
    val df = sparkSession.read.json("raw_data/student.json")

    //DataFrame SQL
    df.createOrReplaceTempView("student")
    sparkSession.sql("select * from student").show
    sparkSession.sql("select username, age from student").show
    sparkSession.sql("select avg(age) from student").show
  }
}

运行结果:

Spark SQL之IDEA中的应用_第3张图片

  •  Test 3
package test.wyh.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object TestSQL {

  def main(args: Array[String]): Unit = {
    //创建SparkSQL的运行环境
    val sparkSQLConf = new SparkConf().setMaster("local[*]").setAppName("testSparkSQL")
    val sparkSession = SparkSession.builder().config(sparkSQLConf).getOrCreate()

    //DataFrame
    val df = sparkSession.read.json("raw_data/student.json")

    //DataFrame DSL
    //在使用DataFrame时,当涉及到隐式转换时,比如说下面的$age,这时需要引入上面创建的SparkSession对象的一些包才能实现转换。
    import sparkSession.implicits._
    df.select("username", "age").show
    df.select($"age" + 1).show//需要导入对象的包import sparkSession.implicits._
  }

}

运行结果:

Spark SQL之IDEA中的应用_第4张图片

  • Test 4

自定义函数:

package test.wyh.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object TestSQL {

  def main(args: Array[String]): Unit = {
    //创建SparkSQL的运行环境
    val sparkSQLConf = new SparkConf().setMaster("local[*]").setAppName("testSparkSQL")
    val sparkSession = SparkSession.builder().config(sparkSQLConf).getOrCreate()

    //DataFrame
    val df = sparkSession.read.json("raw_data/student.json")
    df.createOrReplaceTempView("student")

    import sparkSession.implicits._
    sparkSession.udf.register("prefixName", (name:String) => {
      "Name: " + name
    })
    sparkSession.sql("select age, prefixName(username) from student").show
  }

}

运行结果:

Spark SQL之IDEA中的应用_第5张图片

 

你可能感兴趣的:(大数据之Spark,spark,sql)