主类:
package com.kafka.sources
import java.sql.{Connection, ResultSet, Statement}
import java.util
import java.lang
import java.util.Properties
import com.Test.{MysqlSourceDB, SourceDB}
import com.Utils.{DBConnManager}
import com.kafka.service.KafkaConsumerService
import org.apache.kafka.common.TopicPartition
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._
class TiDBOffsetStore(config:Properties,sourceDB: MysqlSourceDB) extends OffsetStore {
private val kafkaservie = KafkaConsumerService(config)
override def readOffsets(topics: Set[String], appName: String, isEarList: Boolean): Map[TopicPartition, Long] = {
// 读取kafka的partition
val topicsPartionList: List[TopicPartition] = topics.flatMap(topic => kafkaservie.getTopicPartition(topic)).toList
val partitionMap = new mutable.HashMap[TopicPartition,Long]()
// 读取mysql中的offsetInfo数据
val conn: Connection = DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)
var rs: ResultSet=null
var stat: Statement=null
try {
stat = conn.createStatement
val sql = s"SELECT topic,partitionNum,untilOffset FROM mysql_kafka_offset WHERE appName ='$appName' AND topic in (${topics.mkString("'", "','", "'")}) AND current=1"
rs = stat.executeQuery(sql)
while (rs.next()) {
partitionMap += (new TopicPartition(rs.getString("topic"), rs.getInt("partionNum")) -> rs.getLong("until0ffset"))
}
} catch {
case e: Exception => e.printStackTrace()
} finally {
rs.close()
stat.close()
conn.close()
}
// 处理新增partition
val newPartitionList=new ListBuffer[TopicPartition]
topicsPartionList.foreach(partitions=>{
if (!partitionMap.contains(partitions))newPartitionList +=partitions
})
val newPartitionMap=if (isEarList) getKafkaEndOffset(newPartitionList.toList) else getKafkaBeginningOffset(newPartitionList.toList)
// 合并数据
newPartitionMap.asScala.foreach(e => partitionMap +=(e._1 -> e._2))
partitionMap.toMap
}
override def saveOffsets(appName: String, rdd: RDD[_]): Unit = {
var conn=DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)
val offsetsRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
try {
offsetsRanges.foreach(offset => {
if(conn == null || conn.isClosed) conn=DBConnManager.getConnection(sourceDB.dirver,sourceDB.url,sourceDB.username,sourceDB.password)
var countRs:ResultSet = null
var countStat:Statement =null
var stat:Statement =null
try {
val countsql = s"SELECT count(*) from mysql_kafka_offset WHERE appName ='$appName' AND topic = '${offset.topic}' AND partitionNum=${offset.partition} AND current=1"
countStat=conn.createStatement()
countRs=countStat.executeQuery(countsql)
var count =0L
while (countRs.next()){
count =countRs.getLong(1)
}
stat=conn.createStatement()
if (count >0){
val updateSQLBuilder=s"UPDATE mysql_kafka_offset SET fromOffset=${offset.fromOffset},untilOffset=${offset.untilOffset} where appName='$appName' AND topic='${offset.topic}' AND partitionNum=${offset.partition} AND current=1"
stat.executeUpdate(updateSQLBuilder)
}else{
val insertSql=s"INSERT INTO mysql_kafka_offset(appName,topic,partitionNum,current,fromOffset,untilOffset)VALUES('$appName','${offset.topic}','${offset.partition}',1,'${offset.fromOffset}','${offset.untilOffset}')"
stat.executeUpdate(insertSql)
}
} catch {
case e:Exception =>e.printStackTrace()
} finally {
countRs.close()
countStat.close()
stat.close()
}
})
} catch {
case e: Exception =>e.printStackTrace()
} finally {
conn.close()
}
}
def getKafkaBeginningOffset(topicPartitions: List[TopicPartition]): util.Map[TopicPartition, lang.Long] =
kafkaservie.getBeginningOffset(topicPartitions)
def getKafkaEndOffset(topicPartitions: List[TopicPartition]): util.Map[TopicPartition, lang.Long] =
kafkaservie.getEndOffset(topicPartitions)
}
KafkaConsumerService类:
package com.kafka.service
import java.util
import java.util.Properties
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import scala.collection.mutable.ListBuffer
import java.{lang => jl, util => ju}
import scala.collection.JavaConverters._
class KafkaConsumerService(createConsumer: () => KafkaConsumer[String,String]) extends Serializable {
lazy val consumer=createConsumer()
def getTopicPartition(topic:String) :List[TopicPartition] ={
val list = new ListBuffer[TopicPartition]
//consumer.partitionsFor(topic)获取topic的partition信息 可以得到这个topic的所有partition 返回值是一个uitl.list[PartitionInfo] [0,1,2,3,4,5,,6,7,8,9
// 获取每一个topic对应所有的partition
consumer.partitionsFor(topic).asScala.foreach(partitioninfo =>{
list += new TopicPartition(partitioninfo.topic(),partitioninfo.partition())
})
list.toList
}
def getBeginningOffset(topicPartitions:List[TopicPartition]): ju.Map[TopicPartition,jl.Long] ={
consumer.beginningOffsets(topicPartitions.asJava)
}
def getEndOffset(topicPartitions: List[TopicPartition]): ju.Map[TopicPartition,jl.Long] ={
consumer.endOffsets(topicPartitions.asJava)
}
}
object KafkaConsumerService{
def apply(config:Properties): KafkaConsumerService = {
val createConsumerFunc = () =>{
val consummer = new KafkaConsumer[String,String](config)
sys.addShutdownHook{
consummer.close()
}
consummer
}
new KafkaConsumerService(createConsumerFunc)
}
}
main类:
//kafka参数配置需要参考有
val kafkaParams = Map[String, Object](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> appConf.kafkaBrokers,
ConsumerConfig.GROUP_ID_CONFIG -> appConf.kafkaGroupID,
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
//ConsumerConfig.AUTO_OFFSET_RESET_CONFIG ->
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (true: java.lang.Boolean)
)
val prop = new Properties()
for (para <-kafkaParams){
prop.put(para._1,para._2)
}
val offsetStore = new TiDBOffsetStore(prop,mysqlSourceDB)
val fromOffset: Map[TopicPartition, Long] = offsetStore.readOffsets(topics,appConf.appName,appConf.isLatest)
val kafkaStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
// 本地策略 sparkstreaming 会预先将拉取的数据进行缓存,
// 如果执行程序和kafka代理在一个主机上 使用PreferBrokers,
// 大多数情况下,我们使用PreferConsistent,我在我们的执行程序中均匀分配我们的分区
LocationStrategies.PreferConsistent,
ConsumerStrategies.Assign[String, String](fromOffset.keys.toList, kafkaParams, fromOffset)
)
kafkaStream.foreachRDD(kafkaRdd =>{
offsetStore.saveOffsets(appConf.appName,kafkaRdd)
})