kafka的partitions的offset值写入mysql

主类:

package com.kafka.sources

import java.sql.{Connection, ResultSet, Statement}
import java.util
import java.lang
import java.util.Properties

import com.Test.{MysqlSourceDB, SourceDB}
import com.Utils.{DBConnManager}
import com.kafka.service.KafkaConsumerService
import org.apache.kafka.common.TopicPartition
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._

class TiDBOffsetStore(config:Properties,sourceDB: MysqlSourceDB) extends OffsetStore {

  private val kafkaservie = KafkaConsumerService(config)

  override def readOffsets(topics: Set[String], appName: String, isEarList: Boolean): Map[TopicPartition, Long] = {
    // 读取kafka的partition
    val topicsPartionList: List[TopicPartition] = topics.flatMap(topic => kafkaservie.getTopicPartition(topic)).toList
    val partitionMap = new mutable.HashMap[TopicPartition,Long]()
    // 读取mysql中的offsetInfo数据
    val conn: Connection = DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)
    var rs: ResultSet=null
    var stat: Statement=null
    try {
      stat = conn.createStatement
      val sql = s"SELECT topic,partitionNum,untilOffset FROM mysql_kafka_offset WHERE appName ='$appName' AND topic in (${topics.mkString("'", "','", "'")}) AND current=1"
      rs = stat.executeQuery(sql)
      while (rs.next()) {
        partitionMap += (new TopicPartition(rs.getString("topic"), rs.getInt("partionNum")) -> rs.getLong("until0ffset"))
      }
    } catch {
      case e: Exception  => e.printStackTrace()
    } finally {
      rs.close()
      stat.close()
      conn.close()
    }
    // 处理新增partition
    val newPartitionList=new ListBuffer[TopicPartition]
    topicsPartionList.foreach(partitions=>{
      if (!partitionMap.contains(partitions))newPartitionList +=partitions
    })
    val newPartitionMap=if (isEarList) getKafkaEndOffset(newPartitionList.toList) else getKafkaBeginningOffset(newPartitionList.toList)
    // 合并数据
    newPartitionMap.asScala.foreach(e => partitionMap +=(e._1 -> e._2))
    partitionMap.toMap
  }

  override def saveOffsets(appName: String, rdd: RDD[_]): Unit = {
    var conn=DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)
    val offsetsRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
    try {
      offsetsRanges.foreach(offset => {
        if(conn == null || conn.isClosed) conn=DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)

        var countRs:ResultSet = null
        var countStat:Statement =null
        var stat:Statement =null
        try {
          val countsql = s"SELECT count(*) from mysql_kafka_offset WHERE appName ='$appName' AND topic = '${offset.topic}' AND partitionNum=${offset.partition} AND current=1"
          countStat=conn.createStatement()
          countRs=countStat.executeQuery(countsql)
          var count =0L
          while (countRs.next()){
            count =countRs.getLong(1)
          }

          stat=conn.createStatement()
          if (count >0){
            val updateSQLBuilder=s"UPDATE mysql_kafka_offset SET fromOffset=${offset.fromOffset},untilOffset=${offset.untilOffset} where appName='$appName' AND topic='${offset.topic}' AND partitionNum=${offset.partition} AND current=1"
            stat.executeUpdate(updateSQLBuilder)
          }else{
            val insertSql=s"INSERT INTO mysql_kafka_offset(appName,topic,partitionNum,current,fromOffset,untilOffset)VALUES('$appName','${offset.topic}','${offset.partition}',1,'${offset.fromOffset}','${offset.untilOffset}')"
            stat.executeUpdate(insertSql)
          }
        } catch {
          case e:Exception =>e.printStackTrace()
        } finally {
          countRs.close()
          countStat.close()
          stat.close()
        }
      })
    } catch {
      case e: Exception  =>e.printStackTrace()
    } finally {
      conn.close()
    }
  }
  def getKafkaBeginningOffset(topicPartitions: List[TopicPartition]): util.Map[TopicPartition, lang.Long] =
    kafkaservie.getBeginningOffset(topicPartitions)

  def getKafkaEndOffset(topicPartitions: List[TopicPartition]): util.Map[TopicPartition, lang.Long] =
    kafkaservie.getEndOffset(topicPartitions)
}

KafkaConsumerService类:

 package com.kafka.service

import java.util
import java.util.Properties

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition

import scala.collection.mutable.ListBuffer
import java.{lang => jl, util => ju}

import scala.collection.JavaConverters._
class KafkaConsumerService(createConsumer: () => KafkaConsumer[String,String]) extends Serializable {
  lazy val consumer=createConsumer()

  def getTopicPartition(topic:String) :List[TopicPartition] ={
    val list = new ListBuffer[TopicPartition]
    //consumer.partitionsFor(topic)获取topic的partition信息   可以得到这个topic的所有partition  返回值是一个uitl.list[PartitionInfo] [0,1,2,3,4,5,,6,7,8,9
    // 获取每一个topic对应所有的partition
    consumer.partitionsFor(topic).asScala.foreach(partitioninfo =>{
      list += new TopicPartition(partitioninfo.topic(),partitioninfo.partition())
    })

    list.toList
  }
  def getBeginningOffset(topicPartitions:List[TopicPartition]): ju.Map[TopicPartition,jl.Long] ={
    consumer.beginningOffsets(topicPartitions.asJava)
  }

  def getEndOffset(topicPartitions: List[TopicPartition]): ju.Map[TopicPartition,jl.Long] ={
    consumer.endOffsets(topicPartitions.asJava)
  }
}
object KafkaConsumerService{
  def apply(config:Properties): KafkaConsumerService = {
    val createConsumerFunc = () =>{
      val consummer = new KafkaConsumer[String,String](config)
      sys.addShutdownHook{
        consummer.close()
      }
      consummer
    }
    new KafkaConsumerService(createConsumerFunc)
  }
}

main类:

 //kafka参数配置需要参考有
    val kafkaParams = Map[String, Object](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> appConf.kafkaBrokers,
      ConsumerConfig.GROUP_ID_CONFIG -> appConf.kafkaGroupID,
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
      //ConsumerConfig.AUTO_OFFSET_RESET_CONFIG ->
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (true: java.lang.Boolean)
   )

    val prop = new Properties()
    for (para <-kafkaParams){
      prop.put(para._1,para._2)
    }
    val offsetStore = new TiDBOffsetStore(prop,mysqlSourceDB)
    val fromOffset: Map[TopicPartition, Long] = offsetStore.readOffsets(topics,appConf.appName,appConf.isLatest)
    val kafkaStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
      ssc,
      // 本地策略 sparkstreaming 会预先将拉取的数据进行缓存,
      // 如果执行程序和kafka代理在一个主机上 使用PreferBrokers,
      // 大多数情况下,我们使用PreferConsistent,我在我们的执行程序中均匀分配我们的分区

      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Assign[String, String](fromOffset.keys.toList, kafkaParams, fromOffset)
    )
    kafkaStream.foreachRDD(kafkaRdd =>{
      offsetStore.saveOffsets(appConf.appName,kafkaRdd)
    })

你可能感兴趣的:(spark)