Df发送到kafka

import java.util.Properties

 

import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroSerializer}

import org.apache.avro.Schema

import org.apache.kafka.clients.CommonClientConfigs

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import org.apache.kafka.common.config.SslConfigs

import org.apache.spark.sql.DataFrame

 

object KafkaProducerDF {

 

  val kafkaParams: Map[String, Object] = Map(

    CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",

    CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> "SSL",

    SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG -> "/path/to/truststore.jks",

    SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG -> "truststore-password",

    SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG -> "/path/to/keystore.jks",

    SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG -> "keystore-password",

    SslConfigs.SSL_KEY_PASSWORD_CONFIG -> "key-password",

    "key.serializer" -> "org.apache.kafka.common.serialization.StringSerializer",

    "value.serializer" -> classOf[KafkaAvroSerializer],

    AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> "https://schema-registry-url:8081",

    AbstractKafkaAvroSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY -> "io.confluent.kafka.serializers.subject.TopicNameStrategy"

  )

 

  def writeToKafka(df: DataFrame, topic: String, schema: String): Unit = {

    df.foreachPartition { partition =>

      val props = new Properties()

      props.putAll(kafkaParams.asJava)

 

      val producer = new KafkaProducer[String, AnyRef](props)

 

      val avroSchema = new Schema.Parser().parse(schema)

 

      partition.foreach { row =>

        val key = row.getAs[String]("key")

        val value = AvroUtils.toAvro(row, avroSchema)

 

        val record = new ProducerRecord[String, AnyRef](topic, key, value)

        producer.send(record)

      }

 

      producer.flush()

      producer.close()

    }

  }

}

 

object AvroUtils {

  def toAvro(row: Row, schema: Schema): AnyRef = {

    val builder = new GenericRecordBuilder(schema)

    for (i <- 0 until row.length) {

      builder.set(schema.getFields.get(i).name(), row.get(i))

    }

    builder.build()

  }

}

 

你可能感兴趣的:(kafka)