spark streaming整合Kafka 实战 Direct

package com.zholei.kafkaprj

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, ConsumerStrategy, HasOffsetRanges, KafkaUtils, LocationStrategies, LocationStrategy, OffsetRange}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object KafkaDemo {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
      .setAppName(this.getClass.getSimpleName.stripPrefix("$"))
      .setMaster("local[2]")
    val ssc = new StreamingContext(sparkConf, Seconds(3))
    ssc.sparkContext.setLogLevel("ERROR")

    val topic = Array("zho_test")
    // 配置 kafkaParams 参数 ,当未保存偏移量时,从当前偏移量消费
    val kafkaParams = Map[String, String](
      "bootstrap.servers" -> "IP:POST",
      "group.id" -> "sparkTest",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> "false",
      "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
      "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer"
    )
    val preferConsistent: LocationStrategy = LocationStrategies.PreferConsistent
    var consumerStrategies: ConsumerStrategy[String, String] = null

    val topicPartition: Array[TopicPartition] = Array(new TopicPartition("zho_test", 1))
    val offset: Map[TopicPartition, Long] = Map[TopicPartition, Long](new TopicPartition("zho_test", 1) -> 324L)
    if (offset.nonEmpty)
      consumerStrategies = ConsumerStrategies.Subscribe(topic, kafkaParams, offset)
    else
      consumerStrategies = ConsumerStrategies.Subscribe(topic, kafkaParams)
    val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(
      ssc,
      preferConsistent,
      consumerStrategies
    )
    stream.foreachRDD(rdd=>{
      val partitioner: RDD[ConsumerRecord[String, String]] = rdd

      rdd.foreachPartition(line=>{

        line.foreach(line=>{
          line.topic()
          line.partition()
          line.offset()
          line.value()
        })
      })
      val offsetRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      offsetRanges.foreach(line=>{
        /*
        *@param topic Kafka主题名
        *@param partition卡夫卡分区id
        *@param fromOffset包含起始偏移量
        *@param untilOffset独占结束偏移量
         */
        line.topic
        line.partition
        line.untilOffset
        line.fromOffset

      })
    })

  }
}

你可能感兴趣的:(Kafka)