SparkStreaming与Kafka010之05之01 Consumer

package Kafka010

import Kafka010.Utils.MyKafkaUtils
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
 * Created by Shi shuai RollerQing on 2019/12/24 19:47
 *
 * kakfa的API 0-10版本的Consumer测试
 */
//TODO :  kakfa的API 0-10版本的Consumer测试
object Kafka010Demo01 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName(s"${this.getClass.getCanonicalName}")
    val ssc = new StreamingContext(conf, Seconds(5))

    val topic = List("topicA") //后面的ConsumerStrategies的参数要求topic为集合的形式 可能不止一个topic
    val kafkaParams = MyKafkaUtils.getKafkaConsumerParams("SparkKafka010")

    val ds: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topic, kafkaParams)
    )

    ds.foreachRDD(rdd => {
      if (!rdd.isEmpty()) {
        println(rdd.count())
      }
    })
    ssc.start()
    ssc.awaitTermination()

  }
}
   //这个KafkaUtils.createDirectStream要规定kafka的k v的类型 然后三个参数 一个ssc 另外两个位置策略和消费者策略点进去看看
   // PreferConsistent: Use this in most cases, it will consistently distribute partitions across all executors.
   // PreferBrokers: Use this only if your executors are on the same nodes as your Kafka brokers.
   // PreferFixed: Use this to place particular TopicPartitions on particular hosts if your load is uneven.
   // Any TopicPartition not specified in the map will use a consistent location.

// Assign: 消费部分分区

工具类

package Kafka010.Utils

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

/**
 * Created by Shi shuai RollerQing on 2019/12/24 19:20
 */
object MyKafkaUtils {

  def getKafkaConsumerParams(grouid: String = "SparkStreaming010", autoCommit: String = "true"): Map[String, String] = {
    val kafkaParams = Map[String, String] (
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> autoCommit,
      //ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",//earliest、 none 、latest 具体含义可以点进去看
      ConsumerConfig.GROUP_ID_CONFIG -> grouid,
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer].getName,
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer].getName
    )
    kafkaParams
  }

  /**
   * 这个是官网的写kafka配置的写法,不过还是推荐使用第一种,这样不用自己写参数,避免手误
   *
   * 这个没有经过测试 要是使用也要改下 传参数进来 比如跟上面一样的groupid 要不就使用默认的
   * @return
   */
  def getKafkaConsumerParams2(): Map[String, Object] = {
    val kafkaParams = Map[String, Object] {
      "bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092"
      "key.deserializer" -> classOf[StringDeserializer]
      "value.deserializer" -> classOf[StringDeserializer]
      "auto.offset.reset" -> "latest"
      "group.id" -> "topicA"
      "enable.auto.commit" -> (true: java.lang.Boolean)
    }
    kafkaParams
  }

  def main(args: Array[String]): Unit = {
    println(classOf[StringDeserializer].getName) //org.apache.kafka.common.serialization.StringDeserializer
    println(classOf[StringDeserializer].getClass) //class java.lang.Class
    println(classOf[StringDeserializer]) //class org.apache.kafka.common.serialization.StringDeserializer

  }
}

结果没错 求的就是每5s的批次的数据条数
SparkStreaming与Kafka010之05之01 Consumer_第1张图片

你可能感兴趣的:(kafka,spark)