spark streaming 读取kafka数据

spark streaming 读取kafka数据

1、程序入口

 val spark = SparkSession.builder().appName(this.getClass.getName).master("local[*]").getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext
    val ssc = new StreamingContext(spark.sparkContext, Minutes(bach_time))

2、读取kafka(参数配置)


val comsumerGroup = "demo01_test001"
val template_topic = "cn_number"
   val callback_topic = "cn_data"
    val topics = Array(template_topic, callback_topic)

 val brokers =  "hadoop101:9092,hadoop102:9092,hadoop103:9092"
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> brokers,
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "auto.offset.reset" -> "earliest", //latest  earliest
      "group.id" -> comsumerGroup,
      "enable.auto.commit" -> (false: lang.Boolean),
      "session.timeout.ms" -> 90000.asInstanceOf[Object],
      "heartbeat.interval.ms" -> 9000.asInstanceOf[Object],
      "request.timeout.ms" -> 100000.asInstanceOf[Object],
      "max.partition.fetch.bytes" -> 10240000.asInstanceOf[Object],
      "max.poll.records" -> 800.asInstanceOf[Object]
    )

    //kafka 设置kafka读取topic
    //获取数据流
    val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils
      .createDirectStream(ssc, LocationStrategies.PreferConsistent, Subscribe[String, String](topics, kafkaParams))

3、操作数据:(手动维护偏移量,处理完提交偏移量)

 stream.foreachRDD(rdd1 => {
      val offsetRanges = rdd1.asInstanceOf[HasOffsetRanges].offsetRanges
      ............................................
 stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)}

 ssc.start()
    ssc.awaitTermination()


你可能感兴趣的:(大数据,kafka,spark,big,data,streaming)