所需pom依赖
<dependency>
<groupId>org.scalikejdbcgroupId>
<artifactId>scalikejdbc_2.11artifactId>
<version>2.5.2version>
dependency>
<dependency>
<groupId>org.scalikejdbcgroupId>
<artifactId>scalikejdbc-config_2.11artifactId>
<version>2.5.2version>
dependency>
在Mysql中创建表
CREATE TABLE `offset` (
`groupId` varchar(255) DEFAULT NULL,
`topic` varchar(255) DEFAULT NULL,
`partition` int(11) DEFAULT NULL,
`untilOffset` bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
在工程的resources目录下建一个application.conf
db.default.driver="com.mysql.jdbc.Driver"
db.default.url="jdbc:mysql://hdp1:3306/test?characterEncoding=utf-8"
db.default.user="root"
db.default.password="root"
代码如下:
package com.guantengyun.sparkstreaming
import com.guantengyun.sparkstreaming.util.{OrderUtil}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.{SparkConf}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, HasOffsetRanges, KafkaUtils, LocationStrategies}
import scalikejdbc.{DB, SQL}
import scalikejdbc.config.DBs
object SparkStreaming_Kafka_Redis_Order_demo07 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(5))
val groupId = "order_info_offset_consumer"
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hdp1:9092,hdp2:9092,hdp3:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupId,
"auto.offset.reset" -> "earliest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("order_info")
DBs.setup()
val fromdbOffset: Map[TopicPartition, Long] = DB.readOnly(
implicit session => {
SQL(s"select * from offset where groupId = '${groupId}'")
.map(rs => (new TopicPartition(rs.string("topic"), rs.int("partition")), rs.long("untilOffset")))
.list().apply()
}
).toMap
val stream: InputDStream[ConsumerRecord[String, String]] = if (fromdbOffset.size == 0) {
KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
)
} else {
KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Assign[String, String](fromdbOffset.keys, kafkaParams, fromdbOffset)
)
}
stream.foreachRDD((rdd, time) => {
if (!rdd.isEmpty()) {
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
// kafka value 值
val value1: RDD[String] = rdd.map(_.value())
//存入HDFS 依据批次时间存入不同的文件夹下
value.saveAsTextFile(s"hdfs://hdp0311/order_info/output_${time.milliseconds}")
// TODO 此处写逻辑code
...
//偏移量同步到Mysql
DB.localTx(
implicit session => {
for (or <- offsetRanges) {
SQL("replace into `offset` (groupId,topic,`partition`,untilOffset) values(?,?,?,?)")
.bind(groupId, or.topic, or.partition, or.untilOffset).update().apply()
}
}
)
}
})
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}