依赖
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-core_2.11artifactId>
<version>2.4.3version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>2.9.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-authartifactId>
<version>2.9.2version>
dependency>
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-streaming_2.11artifactId>
<version>2.4.3version>
dependency>
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-streaming-kafka-0-10_2.11artifactId>
<version>2.4.3version>
dependency>
代码
package com.baizhi
import com.baizhi.demo08.KafkaSink
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, State, StateSpec, StreamingContext}
object WordCountKafka {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[6]")
val ssc = new StreamingContext(sparkConf, Seconds(2))
ssc.sparkContext.setLogLevel("FATAL")
ssc.checkpoint("file:///D:/checkp")
val kafkaParams = Map[String, Object](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "spark:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "g1",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
val kafkaSinkBroadcast=ssc.sparkContext.broadcast(new KafkaSink("topic02","spark:9092"))
val messages = KafkaUtils.createDirectStream[String, String](ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](List("topic01"), kafkaParams))
messages.map(record=>record.value)
.flatMap(line=>line.split(" "))
.map(word => (word, 1))
.mapWithState(StateSpec.function((k:String,v:Option[Int],stage:State[Int])=>{
var total:Int=0
if(stage.exists()){
total=stage.getOption().getOrElse(0)
}
total += v.getOrElse(0)
stage.u
ssc.start()
ssc.awaitTermination()
}
}
package com.baizhi.demo08
import java.util.{Properties, UUID}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
class KafkaSink(topic:String,severs:String) extends Serializable {
def createKafkaConnection(): KafkaProducer[String, String] = {
val props = new Properties()
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,severs)
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer].getName)
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer].getName)
props.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,"true")
props.put(ProducerConfig.RETRIES_CONFIG,"2")
props.put(ProducerConfig.BATCH_SIZE_CONFIG,"100")
props.put(ProducerConfig.LINGER_MS_CONFIG,"1000")
new KafkaProducer[String,String](props)
}
lazy val kafkaProducer:KafkaProducer[String,String]= createKafkaConnection()
Runtime.getRuntime.addShutdownHook(new Thread(){
override def run(): Unit = {
kafkaProducer.close()
}
})
def save(vs: Iterator[(String, Int)]): Unit = {
try{
vs.foreach(tuple=>{
val record = new ProducerRecord[String,String](topic,tuple._1,tuple._2.toString)
kafkaProducer.send(record)
})
}catch {
case e:Exception=> println("发邮件,出错啦~")
}
}
}