spark+kafka+redis简单操作

sparkStreaming消费kafka数据,将偏移量写入到redis中的简单操作

kafka生产者生成一些数据

object Kafkaproducer {
  def main(args: Array[String]): Unit = {

    // kafka的配置
    val props: Properties = new Properties()
    // broker的节点
    props.setProperty("bootstrap.servers", "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092")
    // key  和value的序列化
    props.setProperty("key.serializer", classOf[StringSerializer].getName)
    props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    // key - value
    val producer: KafkaProducer[String, String] = new KafkaProducer[String, String](props)

    for (i <- 0 to 1000) {
      val topic = "topic3"
      //  a - g 生成单词
      val word:String = String.valueOf ((Random.nextInt(6) + 'a').toChar)
      Thread.sleep(500)
      val record: ProducerRecord[String, String] = new ProducerRecord[String, String](topic, word)
      producer.send(record)
    }
    println("写入成功")
    producer.close()
  }
}

Streaming消费后用redis记录偏移量

object streamingAndKafka {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf()
      .setAppName(this.getClass.getSimpleName)
      .setMaster("local[*]")

    val groupId = "streaming1"
    val topics = Array("topic3")
    // kafka的配置参数
    val kafkaParams = mutable.HashMap[String, Object](
      "bootstrap.servers" -> "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092",
      // key  和value的反序列化
      "key.deserializer" -> classOf[StringDeserializer].getName,
      "value.deserializer" -> classOf[StringDeserializer].getName,
      "group.id" -> groupId,

      "auto.offset.reset"->"earliest",
      "enable.auto.commit" -> "false"
    )

    val ssc: StreamingContext = new StreamingContext(conf, Seconds(2))
    // 从redis中获取偏移量
    val offsetsMap = mutable.HashMap[TopicPartition, Long]()
    val jedis: Jedis = new Jedis("192.168.15.134", 6379)
    val partandOffset: util.Map[String, String] = jedis.hgetAll(topics(0) + "-" + groupId)
    import scala.collection.JavaConversions._
    for (part <- partandOffset) {
      offsetsMap += (new TopicPartition(topics(0), part._1.toInt) -> part._2.toLong)
    }


//    读到kafka中的数据之后,执行的业务逻辑:

    val kafkaDs: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))

    kafkaDs.foreachRDD(rdd => {
      // 判断rdd非空
      if (!rdd.isEmpty()) {
        // 获取每一个分区的消费的偏移量
        val ranges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
        ranges.foreach(println)
        // 业务逻辑
        rdd.foreach(t => (t.value(), t.offset(), t.partition()))

        // 偏移量 写到redis中
        val curTopic = topics(0)
        val config = ConfigFactory.load()
        for (t <- ranges) {
          // hash  groupId topic partition offset   hset (key,p1-v1)
          // 拼接成key: groupId-topic
          val jedis: Jedis = new Jedis("192.168.15.134", 6379)
          jedis.hset(t.topic + "-" + groupId, t.partition + "", t.untilOffset + "")
          jedis.close()
        }
      }
    })
    ssc.start()
    ssc.awaitTermination()
    kafkaDs.stop()
  }
}

你可能感兴趣的:(spark)