spark RDD只能通过原生API去写。不是spark streaming哦。
导maven包:
这一步不能复制粘贴,要看看你机器的kafka版本是多少。然后去下载对应的包
org.apache.kafka
kafka_2.10
0.9.0.0
org.apache.kafka
kafka-clients
0.9.0.0
导包:
WriteToKafka的包
import java.util.Properties
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.log4j.{Level, Logger}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
KafkaSink的包
import java.util.concurrent.Future
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata }
代码:
复制粘贴即可,记得修改为你的kafka地址。
object WriteToKafka {
def main(args: Array[String]): Unit = {
Logger.getRootLogger.setLevel(Level.WARN)//设置log显示级别的,报错就把这个删了
val conf=new SparkConf().setMaster("local").setAppName("app")
val sc:SparkContext=new SparkContext(conf)
val rdd:RDD[String]=sc.parallelize(Array("1","2","3","4"))
// 广播KafkaSink
val kafkaProducer: Broadcast[KafkaSink[String, String]] = {
val kafkaProducerConfig = {
val p = new Properties()
p.setProperty("bootstrap.servers", "192.168.163.120:9092")//修改为你的kafka地址
p.setProperty("key.serializer", classOf[StringSerializer].getName)
p.setProperty("value.serializer", classOf[StringSerializer].getName)
p
}
sc.broadcast(KafkaSink[String, String](kafkaProducerConfig))
}
rdd.foreach(record=>{
kafkaProducer.value.send("test",record)
})
}
}
class KafkaSink[K, V](createProducer: () => KafkaProducer[K, V]) extends Serializable {
/* This is the key idea that allows us to work around running into
NotSerializableExceptions. */
lazy val producer = createProducer()
def send(topic: String, key: K, value: V): Future[RecordMetadata] =
producer.send(new ProducerRecord[K, V](topic, key, value))
def send(topic: String, value: V): Future[RecordMetadata] =
producer.send(new ProducerRecord[K, V](topic, value))
}
object KafkaSink {
import scala.collection.JavaConversions._
def apply[K, V](config: Map[String, Object]): KafkaSink[K, V] = {
val createProducerFunc = () => {
val producer = new KafkaProducer[K, V](config)
sys.addShutdownHook {
// Ensure that, on executor JVM shutdown, the Kafka producer sends
// any buffered messages to Kafka before shutting down.
producer.close()
}
producer
}
new KafkaSink(createProducerFunc)
}
def apply[K, V](config: java.util.Properties): KafkaSink[K, V] = apply(config.toMap)
}
参考其他博主的,不过忘了在哪里找到的了,想起了会补上来的
希望能帮到有需要的朋友。