Sparksql 基本使用

package com.spark.week3

import org.apache.spark.sql.SparkSession

object One {

  System.setProperty("hadoop.home.dir","D:/soft/hadoop/hadoop-2.7.3")

  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder().master("local").appName("Week3One").getOrCreate()
    val rdd = spark.sparkContext.textFile("G:/data/week3one.txt")
    val rdd1 = rdd.map(line => Goods(line.split(",")(0), line.split(",")(1), line.split(",")(2).toInt))
    import spark.implicits._
    val df = rdd1.toDF("name", "good", "money")
    df.show()
 
    df.createTempView("goods")
    val s1 = spark.sql("select name,sum(money) s1 from goods group by name order by s1 desc")
    s1.show()
    val s2 = spark.sql("select name,Avg(money) s2 from goods group by name order by s2 desc")
    s2.show()
    val s3 = spark.sql("select name,sum(money) s3 from goods group by name order by s3 desc limit 1")
    s3.show()
    val s4 = spark.sql("select name,sum(money) s4 from goods group by name order by s4 limit 1")
    s4.show()

    spark.stop()

  }

  case class Goods(name:String,good:String,money:Int)
}

你可能感兴趣的:(Spark,spark)