MySQL存储Kafka偏移量-快速入门Scala篇

所需pom依赖


<dependency>
    <groupId>org.scalikejdbcgroupId>
    <artifactId>scalikejdbc_2.11artifactId>
    <version>2.5.2version>
dependency>


<dependency>
   <groupId>org.scalikejdbcgroupId>
   <artifactId>scalikejdbc-config_2.11artifactId>
   <version>2.5.2version>
dependency>

在Mysql中创建表

CREATE TABLE `offset` (
  `groupId` varchar(255) DEFAULT NULL,
  `topic` varchar(255) DEFAULT NULL,
  `partition` int(11) DEFAULT NULL,
  `untilOffset` bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8

在工程的resources目录下建一个application.conf

db.default.driver="com.mysql.jdbc.Driver"
db.default.url="jdbc:mysql://hdp1:3306/test?characterEncoding=utf-8"
db.default.user="root"
db.default.password="root"

代码如下:

package com.guantengyun.sparkstreaming

import com.guantengyun.sparkstreaming.util.{OrderUtil}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.{SparkConf}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, HasOffsetRanges, KafkaUtils, LocationStrategies}
import scalikejdbc.{DB, SQL}
import scalikejdbc.config.DBs

object SparkStreaming_Kafka_Redis_Order_demo07 {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("NetworkWordCount")
    val ssc = new StreamingContext(conf, Seconds(5))
    val groupId = "order_info_offset_consumer"

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "hdp1:9092,hdp2:9092,hdp3:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> groupId,
      "auto.offset.reset" -> "earliest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("order_info")
    
    DBs.setup()

    val fromdbOffset: Map[TopicPartition, Long] = DB.readOnly(
      implicit session => {
        SQL(s"select * from offset where groupId = '${groupId}'")
          .map(rs => (new TopicPartition(rs.string("topic"), rs.int("partition")), rs.long("untilOffset")))
          .list().apply()
      }
    ).toMap

    val stream: InputDStream[ConsumerRecord[String, String]] = if (fromdbOffset.size == 0) {
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
      )
    } else {
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Assign[String, String](fromdbOffset.keys, kafkaParams, fromdbOffset)
      )
    }

    stream.foreachRDD((rdd, time) => {
      if (!rdd.isEmpty()) {
        val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
		// kafka value 值
        val value1: RDD[String] = rdd.map(_.value())
       
        //存入HDFS 依据批次时间存入不同的文件夹下
        value.saveAsTextFile(s"hdfs://hdp0311/order_info/output_${time.milliseconds}")
        
		// TODO  此处写逻辑code 
		...

		//偏移量同步到Mysql
        DB.localTx(
          implicit session => {
            for (or <- offsetRanges) {
              SQL("replace into `offset` (groupId,topic,`partition`,untilOffset) values(?,?,?,?)")
                .bind(groupId, or.topic, or.partition, or.untilOffset).update().apply()
            }
          }
        )
      }
    })
    ssc.start() // Start the computation
    ssc.awaitTermination() // Wait for the computation to terminate
  }
}

你可能感兴趣的:(笔记,kafka,mysql,spark)