Spark读取Kafka数据存入redis

1. 构建一个scala项目
具体构建过程可参考:https://blog.csdn.net/weixin_44122028/article/details/103881508

2. 依赖配置如下

name := "SparkStreamingReadKafka"

scalaVersion := "2.10.5"

organization := "com.dd"

//添加依赖
libraryDependencies ++= Seq(
	//spark-core依赖。
	"org.apache.spark" %% "spark-core" % "1.6.2" % "provided", 
	"org.apache.spark" %% "spark-sql" % "1.6.2" % "provided",
    "org.apache.spark" %% "spark-streaming" % "1.6.2" % "provided",
    "org.apache.kafka" %% "kafka" % "0.10.0.0",
    "org.apache.spark" %% "spark-streaming-kafka" % "1.6.2",
    "redis.clients" % "jedis" % "2.8.2",
    "com.alibaba" % "fastjson" % "1.2.40",
    "commons-codec" % "commons-codec" % "1.12",   
)

//jar包冲突解决
assemblyMergeStrategy in assembly :={ 
case PathList("org", "apache", xs @ _*) => MergeStrategy.first
case PathList(ps @ _*) if ps.last endsWith "axiom.xml" => MergeStrategy.filterDistinctLines
case PathList(ps @ _*) if ps.last endsWith "Log$Logger.class" => MergeStrategy.first
case PathList(ps @ _*) if ps.last endsWith "ILoggerFactory.class" => MergeStrategy.first
case x => 
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
}

3. Redis代码

import redis.clients.jedis.Jedis

class RedisUtil2 {
  
  private var jedis :Jedis = null
  def this(host: String, port: Int) {
    this() //调用主构造函数
    jedis = new Jedis(host,port)
  }
  
  def hyperSet(key: String, value: String): Long = {
      return jedis.pfadd(key, value)

  }

  def hyperCount(key: String): Long = {
      return jedis.pfcount(key)
  }
   def set(key: String,value : String): String = {
      return jedis.set(key, value);
  }
  
  def get(key: String): String = {
      return jedis.get(key);
  }
}
object RedisCacheManager2 {
  var redisUtil2 : RedisUtil2 = null;
  {
    redisUtil2 =new RedisUtil2("127.0.0.1",6300);
  }
}

5. 主函数从kafka读取数据并存入redis

import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import kafka.serializer.StringDecoder
import cn.dd.utils.DateStrUtil
import org.apache.spark.SparkContext
import cn.dd.utils.RedisUtil2
import cn.dd.utils.Bash64Util4JDK

object SparkStreamingReadKafka {
  def main(args: Array[String]): Unit = {
    //Logger.getRootLogger.setLevel(Level.WARN)

    val Array(zkQuorum, groupID, topic, numThreads) = Array[String]("sc-slave1:2181", "TestConsumerID", "bigdata_screen_topic", "1")

    val sparkConf = new SparkConf().setAppName("SparkReadKafka")

    val ssc = new StreamingContext(sparkConf, Seconds(10))

    //定义topic与分配的消费线程的对应关系
    val topicMap = topic.split(",").map((_, numThreads.toInt)).toMap

    val kafkaParams = Map[String, String](
      "bootstrap.servers" -> "sc-slave7:6667",
      "group.id" -> groupID,
      "zookeeper.connect" -> zkQuorum,
      "enable.auto.commit" -> "true",
      "auto.commit.interval.ms" -> "1000")

    val lines = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)

    //此处具体业务有关代码已省略,只做简单set
    RedisCacheManager2.redisUtil2.set(lines,"value")
    
    //开启进程
    ssc.start()
    //等待进程结束,一般均不会结束!
    ssc.awaitTermination()
  }
}

你可能感兴趣的:(spark,kafka,scala)