实时更新每个用户走的总步数;
每隔5s统计一次,包括某个用户新统计时的时间、所在地点、新增步数;
这里为了方便只将每个用户以及实时更新的步数总和两个维度保存到redis数据库中;
首先造一些模拟数据实时传入kafka队列,然后sparkStreaming从kafka实时读取这些模拟数据并做相关分析,最终将分析结果存入redis;
kafka: kafka_2.10-0.10.2.1
spark: spark-2.2.0-bin-hadoop2.7
redis: redis-3.0.0
IDEA: IntelliJ IDEA 2018.3.2 x64
4.0.0
com.cn
sparkSysLearn
1.0-SNAPSHOT
org.scala-lang
scala-library
2.11.8
org.scala-lang
scala-compiler
2.11.8
org.scala-lang
scala-reflect
2.11.8
org.apache.spark
spark-core_2.11
2.2.1
org.apache.spark
spark-sql_2.11
2.2.1
org.apache.spark
spark-streaming_2.11
2.2.1
org.apache.spark
spark-streaming-kafka-0-10_2.11
2.2.1
org.apache.spark
spark-streaming-kafka_2.10
1.6.3
org.apache.spark
spark-hive_2.11
2.2.1
provided
org.gavaghan
geodesy
1.1.3
com.github.scopt
scopt_2.11
3.7.0
com.google.code.gson
gson
2.2.4
redis.clients
jedis
2.9.0
org.codehaus.jettison
jettison
1.1
net.sf.json-lib
json-lib
2.4
jdk15
org.apache.commons
commons-pool2
2.4.2
telecomeAnalysis-1.0.0
org.apache.maven.plugins
maven-compiler-plugin
3.0
org.apache.maven.plugins
maven-surefire-plugin
2.12.4
maven-compiler-plugin
1.8
UTF-8
maven-assembly-plugin
jar-with-dependencies
false
org.apache.maven.plugins
maven-dependency-plugin
copy
package
copy-dependencies
${project.build.directory}/lib
org.scala-tools
maven-scala-plugin
2.15.2
compile
testCompile
package com.cn.util
import java.util.Properties
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.codehaus.jettison.json.JSONObject
import scala.util.Random
/**
* 编写一个提交数据到kafka集群的producer
* 模拟场景:
* 统计一些用户实时步行的总步数,每隔5s统计一次,包括某个用户新统计时的时间、所在地点、新增步数;
*/
object KafkaEventProducer {
//用户
private val users = Array(
"zhangSan", "liSi",
"wangWu", "xiaoQiang",
"zhangFei", "liuBei",
"guanYu", "maChao",
"caoCao", "guanYu"
)
private var pointer = -1
//随机获得用户
def getUser(): String = {
pointer = (pointer + 1) % users.length
users(pointer)
}
//获取新增步数
val random = new Random()
def getNewStepNum(): Int = {
random.nextInt(users.length)
}
//获取统计时间
def getTime(): Long = {
System.currentTimeMillis()
}
//获取行走地点
val walkPlace = Array(
"操场南门", "操场东门", "操场北门", "操场西门", "操场东南门", "操场西北门", "操场西南门", "操场东南北门"
)
def getWalkPlace(): String = {
walkPlace(random.nextInt(walkPlace.length))
}
def main(args: Array[String]): Unit = {
val topic = "topic_walkCount"
val brokers = "master:6667,slaves1:6667,slaves2:6667"
//设置属性,配置
val props = new Properties()
props.setProperty("bootstrap.servers", brokers)
props.setProperty("metadata.broker.list", brokers)
props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
//生成producer对象
val producer = new KafkaProducer[String, String](props)
//传输数据
while (true) {
val event = new JSONObject()
event.put("user", getUser())
.put("count_time", getTime())
.put("walk_place", getWalkPlace())
.put("new_walkNum", getNewStepNum())
println(event.toString())
//发送数据
producer.send(new ProducerRecord[String, String](topic, event.toString))
Thread.sleep(5000)
}
}
}
package com.cn.util
import redis.clients.jedis.JedisPool
object RedisUtils {
private val host = "master"
private val port = 6379
//private val poolConfig = new GenericObjectPoolConfig()
lazy val pool = new JedisPool(host, port)
//关闭
lazy val hooks = new Thread() {
override def run(): Unit = {
println("Execute hook thread: " + this)
pool.destroy()
}
}
}
package com.cn.sparkStreaming
import com.cn.util.RedisUtils
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010._
import org.codehaus.jettison.json.JSONObject
/**
* 统计一些用户实时步行的总步数,每隔5s统计一次,包括某个用户新统计时的时间、所在地点、新增步数;
* 将每个用户以及实时更新的步数总和保存到redis数据库中;
*/
object kafka2sparkStreaming2redis {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("kafka2sparkStreaming2redis")
.setMaster("local[1]")
//.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
//设置流数据每批的时间间隔为2s
val ssc = new StreamingContext(conf, Seconds(2))
//控制日志输出级别
ssc.sparkContext.setLogLevel("WARN") //WARN,INFO,DEBUG
ssc.checkpoint("checkpoint")
val topic = "topic_walkCount"
val groupId = "t03"
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "master:6667,slaves1:6667,slaves2:6667",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupId,
"auto.offset.reset" -> "earliest", // 初次启动从最开始的位置开始消费
"enable.auto.commit" -> (false: java.lang.Boolean) // 自动提交设置为 false
)
val topics = Array(topic)
val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent, //均匀分发到executor
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
)
val dbIndex = 3;
stream.foreachRDD(rdd => {
// 获取每一个分区的消费的偏移量
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd.foreachPartition(partitions => {
partitions.foreach(records => {
val record = new JSONObject(records.value())
val user = record.getString("user")
val countTime = record.getLong("count_time")
val walkPlace = record.getString("walk_place")
val newWalkNum = record.getInt("new_walkNum")
//获取redis对象
val jedis = RedisUtils.pool.getResource
//redis密码
jedis.auth("123456")
//选择数据库
jedis.select(dbIndex)
val count = jedis.hincrBy("user_walknum", user, newWalkNum)
println(count)
RedisUtils.pool.returnResource(jedis)
})
})
// 手动提交偏移量
stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
})
ssc.start()
ssc.awaitTermination()
}
}
log4j:WARN No appenders could be found for logger (org.apache.kafka.clients.producer.ProducerConfig).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
{"user":"zhangSan","count_time":1582689943955,"walk_place":"操场北门","new_walkNum":5}
{"user":"liSi","count_time":1582689956236,"walk_place":"操场东门","new_walkNum":0}
{"user":"wangWu","count_time":1582689961236,"walk_place":"操场东南北门","new_walkNum":2}
{"user":"xiaoQiang","count_time":1582689966239,"walk_place":"操场东门","new_walkNum":6}
{"user":"zhangFei","count_time":1582689971240,"walk_place":"操场东门","new_walkNum":8}
{"user":"liuBei","count_time":1582689976240,"walk_place":"操场西南门","new_walkNum":5}
{"user":"guanYu","count_time":1582689981240,"walk_place":"操场东南门","new_walkNum":9}
{"user":"maChao","count_time":1582689986240,"walk_place":"操场北门","new_walkNum":6}
{"user":"caoCao","count_time":1582689991245,"walk_place":"操场东南北门","new_walkNum":2}
{"user":"guanYu","count_time":1582689996245,"walk_place":"操场西门","new_walkNum":0}
{"user":"zhangSan","count_time":1582690001246,"walk_place":"操场东门","new_walkNum":3}
{"user":"liSi","count_time":1582690006247,"walk_place":"操场北门","new_walkNum":2}