1. 构建一个scala项目
具体构建过程可参考:https://blog.csdn.net/weixin_44122028/article/details/103881508
2. 依赖配置如下
name := "SparkStreamingReadKafka"
scalaVersion := "2.10.5"
organization := "com.dd"
//添加依赖
libraryDependencies ++= Seq(
//spark-core依赖。
"org.apache.spark" %% "spark-core" % "1.6.2" % "provided",
"org.apache.spark" %% "spark-sql" % "1.6.2" % "provided",
"org.apache.spark" %% "spark-streaming" % "1.6.2" % "provided",
"org.apache.kafka" %% "kafka" % "0.10.0.0",
"org.apache.spark" %% "spark-streaming-kafka" % "1.6.2",
"redis.clients" % "jedis" % "2.8.2",
"com.alibaba" % "fastjson" % "1.2.40",
"commons-codec" % "commons-codec" % "1.12",
)
//jar包冲突解决
assemblyMergeStrategy in assembly :={
case PathList("org", "apache", xs @ _*) => MergeStrategy.first
case PathList(ps @ _*) if ps.last endsWith "axiom.xml" => MergeStrategy.filterDistinctLines
case PathList(ps @ _*) if ps.last endsWith "Log$Logger.class" => MergeStrategy.first
case PathList(ps @ _*) if ps.last endsWith "ILoggerFactory.class" => MergeStrategy.first
case x =>
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
}
3. Redis代码
import redis.clients.jedis.Jedis
class RedisUtil2 {
private var jedis :Jedis = null
def this(host: String, port: Int) {
this() //调用主构造函数
jedis = new Jedis(host,port)
}
def hyperSet(key: String, value: String): Long = {
return jedis.pfadd(key, value)
}
def hyperCount(key: String): Long = {
return jedis.pfcount(key)
}
def set(key: String,value : String): String = {
return jedis.set(key, value);
}
def get(key: String): String = {
return jedis.get(key);
}
}
object RedisCacheManager2 {
var redisUtil2 : RedisUtil2 = null;
{
redisUtil2 =new RedisUtil2("127.0.0.1",6300);
}
}
5. 主函数从kafka读取数据并存入redis
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import kafka.serializer.StringDecoder
import cn.dd.utils.DateStrUtil
import org.apache.spark.SparkContext
import cn.dd.utils.RedisUtil2
import cn.dd.utils.Bash64Util4JDK
object SparkStreamingReadKafka {
def main(args: Array[String]): Unit = {
//Logger.getRootLogger.setLevel(Level.WARN)
val Array(zkQuorum, groupID, topic, numThreads) = Array[String]("sc-slave1:2181", "TestConsumerID", "bigdata_screen_topic", "1")
val sparkConf = new SparkConf().setAppName("SparkReadKafka")
val ssc = new StreamingContext(sparkConf, Seconds(10))
//定义topic与分配的消费线程的对应关系
val topicMap = topic.split(",").map((_, numThreads.toInt)).toMap
val kafkaParams = Map[String, String](
"bootstrap.servers" -> "sc-slave7:6667",
"group.id" -> groupID,
"zookeeper.connect" -> zkQuorum,
"enable.auto.commit" -> "true",
"auto.commit.interval.ms" -> "1000")
val lines = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
//此处具体业务有关代码已省略,只做简单set
RedisCacheManager2.redisUtil2.set(lines,"value")
//开启进程
ssc.start()
//等待进程结束,一般均不会结束!
ssc.awaitTermination()
}
}