spark sql本地测试Demo

spark 本地测试Demo

    import org.apache.spark.sql.SparkSession
    import org.apache.spark.sql.functions._
    // 创建SparkSession
    val spark = SparkSession.builder().master("local[*]").getOrCreate()
    import spark.implicits._
    // 创建示例数据集
    val data = Seq(("Alice", "Apple"),
      ("Bob", "Banana"),
      ("Charlie", "Cherry"),
      ("Alice", "Apricot"),
      ("Alice", "Avocado"))

    val df = spark.createDataFrame(data).toDF("name", "fruit")

    // 使用Spark SQL进行查询
    df.createOrReplaceTempView("fruits_table")
    val result = spark.sql("SELECT name, collect_set(fruit) AS fruits FROM fruits_table GROUP BY name")
      .withColumn("contains_apple", array_contains(col("fruits"), "Apple"))

    result.show()

你可能感兴趣的:(bigData,spark,spark,sql,ajax)