Spark Streaming分析Kakfa数据

Spark Streaming分析Kakfa数据

环境

spark-2.2.0
kafka-2.11-2.30
zookeeper-3.5.5

kafka Producer开发

package doc

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.Random

/**
  * @Author huangwei
  * @Date 19-10-15 
  * @Comments
  **/
object KafkaProducer extends App {

  // topic
  val topic = "KafkaOperation"
  // brokers
  val brokers = "localhost:9091,localhost:9092,localhost:9093"
  // 设置一个随机数
  val rnd = new Random()
  // 配置项
  val props = new Properties()
  // 配置brokers
  props.put("bootstrap.servers",brokers)
  // 设置客户端名称
  props.put("client.id","kafkaGenerator")
  // 序列化
  props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer")
  props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer")
  // 建立Kafka连接
  val producer = new KafkaProducer[String,String](props)
  val t = System.currentTimeMillis()  // 当前系统时间

  val nameAddrs = Map("bob" -> "shanghai#200000","amy" -> "beijing#100000","alice" -> "shanghai#200000","tom" -> "beijing#100000","lulu" -> "hangzhou#310000","nick" -> "shanghai#200000")
  val namePhones = Map("bob" -> "15700079421","amy" -> "18700079458","alice" -> "17730076427","tom" -> "16700379451","lulu" -> "18800074423","nick" -> "14400033426")

  for (nameAddr <- nameAddrs){
    val data = new ProducerRecord[String,String](topic,nameAddr._1,s"${nameAddr._1}\t${nameAddr._2}\t0")
    producer.send(data)
    if (rnd.nextInt(100)<50) Thread.sleep(rnd.nextInt(10))
  }

  for (namePhone <- namePhones){
    val data = new ProducerRecord[String,String](topic,namePhone._1,s"${namePhone._1}\t${namePhone._2}\t1")
    producer.send(data)
    if (rnd.nextInt(100) < 50) Thread.sleep(rnd.nextInt(10))
  }

  System.out.println("sent per second:" + (nameAddrs.size + namePhones.size) * 1000 / (System.currentTimeMillis() - t))
  producer.close()


}

kafka comsumer开发

package doc

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * @Author huangwei
  * @Date 19-10-16 
  * @Comments
  **/
object KafkaOperation {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("SparkStreaming-Kafka").setMaster("local[*]").set("spark.streaming.kafka.maxRatePerPartition","10")
    val ssc = new StreamingContext(conf,Seconds(3))
    // broker和topic创建直接通过Kafka连接Direct Kafka
    val kafkaParams = Map[String,Object](
      "bootstrap.servers" -> "localhost:9091,localhost:9092,localhost:9093",   // 服务器地址
      "key.deserializer" -> classOf[StringDeserializer],          // 序列化
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "kafkaOperationGroup",                      // group 设置
      "auto.offset.reset" -> "latest",                          // 从最新offset开始
      "enable.auto.commit" -> (false:java.lang.Boolean)        // 自动提交
    )
    val kafkaDirectStream = KafkaUtils.createDirectStream[String,String](
      ssc,
      PreferConsistent,
      Subscribe[String,String](List("KafkaOperation"),kafkaParams)
    )
    // 根据接收的kafka信息,切分得到用户地址DStream
    val nameAddStream = kafkaDirectStream.map(_.value).filter(record => {
      val tokens = record.split("\t")
      tokens(2).toInt == 0
    }).map(record => {
      val tokens = record.split("\t")
      (tokens(0),tokens(1))
    })

    val namePhoneStream = kafkaDirectStream.map(_.value).filter(reocrd => {
      val tokens = reocrd.split("\t")
      tokens(2).toInt == 1
    }).map(record => {
      val tokens = record.split("\t")
      (tokens(0),tokens(1))
    })
    val nameAddPhoneStream = nameAddStream.join(namePhoneStream).map(
      record => {
        s"姓名:${record._1},地址:${record._2._1},电话:${record._2._2}"
      }
    )
    nameAddPhoneStream.print()

    ssc.start()
    ssc.awaitTermination()
  }

}

客户端log信息

Spark Streaming分析Kakfa数据_第1张图片

Saprk Jobs UI

Spark Streaming分析Kakfa数据_第2张图片

你可能感兴趣的:(Spark)