利用文件模拟数据表

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object SparkApp {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Spark Sql Test").setMaster("local[2]")
    val session = SparkSession.builder().config(conf).getOrCreate()

    import session.implicits._

    val sqlContext = session.sqlContext

    val people = sqlContext.read.textFile("D:\\resources\\test.txt")

    val peopleDs = people
      .map(_.split(","))
      .map(p => Person(p(0), p(1).trim.toInt, p(2)))

    peopleDs.createOrReplaceTempView("peopleTempView")

    val count = session.sql("SELECT count(1) FROM peopleTempView").rdd.first().getAs[Long](0)

    println(count)
  }

}


case class Person(name:String, age:Int, addr:String)

aa,25,chengdu
bb,23,beijing
cc,16,hangzhou
dd,19,zhejiang

你可能感兴趣的:(利用文件模拟数据表)