sparksql两种解决方式

object Test02 {
  def main(args: Array[String]): Unit  = {
    val spark: SparkSession = SparkSession.builder().appName("aa").master("local[2]").getOrCreate()
             import spark.implicits._
           import org.apache.spark.sql.functions._
    val df: DataFrame = spark.read .json("file:///D:\\test\\1.json")
    df.printSchema()
      df.groupBy().count().show()
    df.summary("count").show()
    df.select("name","mon").groupBy().sum().show()
    df.createTempView("tmp")
    spark.sql(
      """
        |select count(1) as sum_alias from tmp
        |""".stripMargin
    ).show()
    spark.sql(
      """
        |select sum(mon) as sum_alias from tmp
        |""".stripMargin
    ).show()








    spark.stop()
  }

}

你可能感兴趣的:(spark,spark,大数据,分布式)