SparkStreaming写Hive一个小Demo案例(数据源为 Socket)


import org.apache.spark.SparkContext
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.json.JSONObject

object kafka2HiveStreaming {

  def main(args: Array[String]): Unit = {

    // 1.构建上下文
    val sparkSession = SparkSession
      .builder()
      .master("local[*]")
      .appName("kafka2HiveStreaming")
      .enableHiveSupport()
      .getOrCreate()

    val sc: SparkContext = sparkSession.sparkContext
    val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
    val DStream = ssc.socketTextStream("192.168.153.137", 9999)

    val result = DStream
      .map(t => {
        // 将传入的数据转换成 Json
        val json = new JSONObject(t)
        json
      })
      .transform(rdd =>
        rdd.map(t => {
          val aa = t.getString("data")
          aa
        })
      )
    val dstream2: DStream[(String, String, String)] = result
      .map(t => {
        val json2 = new JSONObject(t)
        var ID = json2.getString("Id")
        var Name = json2.getString("Name")
        var phone = json2.getString("phone")
        (ID, Name, phone)
      })

    import sparkSession.implicits._

    val dataframeData = dstream2
      .foreachRDD(
        rdd =>
          rdd
            .toDF("ID", "NAME", "PHONE")
            .coalesce(1)
            .write
            .mode(SaveMode.Append)
            .insertInto("kafka2HiveStreaming")
      )

    //    sparkSession.sql("select * from kafka2HiveStreaming").show()
    //sparkSession.sql("insert into kafka2HiveStreaming select * from tmptable")

    ssc.start()

    ssc.awaitTermination()

  }
}

 

你可能感兴趣的:(Spark)