spark-streaming kafka api(KafkaUtils.createDirectStream)使用

本程序参照spark-streaming kafka官方示例

注意:官方提供的spark-streaming-kafka调用借口,java和scala使用是不同的

1.本次的程序为(以local[2]方式启动

本次主要学习spark-streaming-kafka接口

KafkaUtils.createDirectStream

object DirectKafkaWordCount {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println(s"""
        |Usage: DirectKafkaWordCount  
        |   is a list of one or more Kafka brokers
        |   is a list of one or more kafka topics to consume from
        |
        """.stripMargin)
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()

    val Array(brokers, topics) = args

    // Create context with 2 second batch interval
    val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[2]").set("spark.executor.memory","3g")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // Create direct kafka stream with brokers and topics
    val topicsSet = topics.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
    val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
      ssc, kafkaParams, topicsSet)

    // Get the lines, split them into words, count the words and print
    val lines = messages.map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
    wordCounts.print()

    // Start the computation
    ssc.start()
    ssc.awaitTermination()
  }
}

2 .本程序的maven工程的pom.xml文件为



    4.0.0

    com.reco
    spark-examples
    2.1.1
    

        2.11.8
        2.11
        2.1.1
        2.12.1
        jline
        1.3.0
        0.8.2.2
    
    
    
        
        
            org.spark-project.spark
            unused
            1.0.0
            compile
        

        
            org.apache.spark
            spark-core_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-streaming_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-mllib_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-hive_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-graphx_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-streaming-flume_${scala.binary.version}
            ${project.version}
            compile
        
        
            org.apache.spark
            spark-streaming-kafka-0-8_${scala.binary.version}
            ${project.version}
            compile
        
        
        
            org.apache.commons
            commons-math3
            3.6.1
        

        
            com.github.scopt
            scopt_${scala.binary.version}
            3.3.0
        

        
            org.apache.kafka
            kafka_${scala.binary.version}
            ${kafka.version}
            compile
        
        
            com.google.guava
            guava
            
            16.0.1
        
    

    
        target/scala-${scala.binary.version}/classes
        target/scala-${scala.binary.version}/test-classes
        
            
                org.apache.maven.plugins
                maven-deploy-plugin
                
                    true
                
            
            
                org.apache.maven.plugins
                maven-install-plugin
                
                    true
                
            
            
                org.apache.maven.plugins
                maven-jar-plugin
                
                    ${jars.target.dir}
                
            

            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    1.8
                    1.8
                
            
        
    
    
        
            kinesis-asl
            
                
                    org.apache.spark
                    spark-streaming-kinesis-asl_${scala.binary.version}
                    ${project.version}
                    provided
                
            
        
    


你可能感兴趣的:(kafka)