示例:Spark Streming+Kafka整合(spark-streaming-kafka-0-10_2.11)

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.{Seconds, StreamingContext}

object kafka_Direct_streaming {
  Logger.getLogger("org").setLevel(Level.WARN)

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("kafka_Direct_streaming").setMaster("local[*]")
    val ssc = new StreamingContext(sparkConf, Seconds(5))

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "hadoop01:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("kafka_streaming_topic")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics,kafkaParams)
    )

  	stream.map(_.value()).print()
  
    ssc.start()
    ssc.awaitTermination()
  }
}

启动:

  1. 开启Zookeeper -> zkServer.sh start
  2. 开启Kafka -> kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties
  3. 开启生产者 -> kafka-console-producer.sh --broker-list hadoop01:9092 --topic kafka_streaming_topic
  4. 编写代码并运行
  5. 查看结果

参考官方:https://spark.apache.org/docs/2.2.2/streaming-kafka-0-10-integration.html

你可能感兴趣的:(#,Kafka,#,Spark)