SparkStreaming整合Kafka--Direct方式


    4.0.0
    com.sid.spark
    spark-train
    1.0
    2008
    
        2.11.8
        0.9.0.0
        2.2.0
        2.9.0
        1.4.4
    

    
        
            scala-tools.org
            Scala-Tools Maven2 Repository
            http://scala-tools.org/repo-releases
        
    


    
        
            scala-tools.org
            Scala-Tools Maven2 Repository
            http://scala-tools.org/repo-releases
        
    


    
        
            org.scala-lang
            scala-library
            ${scala.version}
        
        
            org.apache.kafka
            kafka_2.11
            ${kafka.version}
        

        
            org.apache.hadoop
            hadoop-client
            ${hadoop.version}
            
                
                    servlet-api
                    javax.servlet
                
            
        

        
        
        
        
        

        
        
        
        
        

        
            org.apache.spark
            spark-streaming_2.11
            ${spark.version}
        

        
            org.apache.spark
            spark-sql_2.11
            ${spark.version}
        

        
            org.apache.spark
            spark-streaming-flume_2.11
            ${spark.version}
        

        
            org.apache.spark
            spark-streaming-flume-sink_2.11
            ${spark.version}
        

        
            org.apache.spark
            spark-streaming-kafka-0-8_2.11
            2.2.0
        

        
            net.jpountz.lz4
            lz4
            1.3.0
        

        
            mysql
            mysql-connector-java
            5.1.31
        

        
            org.apache.commons
            commons-lang3
            3.5
        

    

    
        src/main/scala
        
        
            
                org.scala-tools
                maven-scala-plugin
                
                    
                        
                            compile
                            testCompile
                        
                    
                
                
                    ${scala.version}
                    
                        -target:jvm-1.5
                    
                
            

            
                org.apache.maven.plugins
                maven-eclipse-plugin
                
                    true
                    
                        ch.epfl.lamp.sdt.core.scalabuilder
                    
                    
                        ch.epfl.lamp.sdt.core.scalanature
                    
                    
                        org.eclipse.jdt.launching.JRE_CONTAINER
                        ch.epfl.lamp.sdt.launching.SCALA_CONTAINER
                    
                
            
        
    
    
        
            
                org.scala-tools
                maven-scala-plugin
                
                    ${scala.version}
                
            
        
    

SparkStreaming整合Kafka--Direct方式_第1张图片

package com.zoujc.sparkstreaming

import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


/**
  * Spark Streaming 基于 Direct 对接Kafka
  */
object KafkaDirect {
   def main(args: Array[String]): Unit = {
      if (args.length != 2) {
         System.err.println("Usage: KafkaDirect  ")
         System.exit(1)
      }
      val Array(brokers, topics) = args
      val sparkConf = new SparkConf().setAppName("KafkaReceiver").setMaster("local[3]")

      val ssc = new StreamingContext(sparkConf, Seconds(5))

      /*
      @param ssc StreamingContext object
      @param kafkaParams Kafka 
         configuration parameters. Requires "metadata.broker.list" or "bootstrap.servers"
      to be set with Kafka broker(s) (NOT zookeeper servers), specified in
            host1:port1,host2:port2 form.
      If not starting from a checkpoint, "auto.offset.reset" may be set to "largest" or "smallest"
      to determine where the stream starts (defaults to "largest")
      @param topics Names of the topics to consume
      @tparam K type of Kafka message key
      @tparam V type of Kafka message value
      @tparam KD type of Kafka message key decoder
      @tparam VD type of Kafka message value decoder
      @return DStream of (Kafka message key, Kafka message value) */

      val topicsSet = topics.split(",").toSet
      val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
      val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
      messages.print()
      messages.map(_._2).flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _).print()
      ssc.start()
      ssc.awaitTermination()

   }
}

你可能感兴趣的:(sparkStreaming,kafka)