spark-streaming 消费带有kerberos认证的kafka

1.代码

package security.view.model

import java.util.Properties

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010._
import org.datanucleus.util.StringUtils

import scala.collection.JavaConverters._

object RealTimeAgentServerIncident {
  def main(args: Array[String]): Unit = {

    //加载配置文件
    val spark = SparkSession.builder().appName("Kafka2Spark2Kudu-kerberos").config(new SparkConf()).getOrCreate()

    val ssc = new StreamingContext(spark.sparkContext, Seconds(6)) //设置Spark时间窗口,每5s处理一次

    val kafkaParams = Map[String, Object]("bootstrap.servers" -> "master:9092"
      , "auto.offset.reset" -> "latest"
      , "security.protocol" -> "SASL_PLAINTEXT"
      , "sasl.kerberos.service.name" -> "kafka"
      , "sasl.mechanism" -> "GSSAPI"
      , "key.deserializer" -> classOf[StringDeserializer]
      , "value.deserializer" -> classOf[StringDeserializer]
      , "group.id" -> "testgrou111p"
    )
    val topics = Array("agent.server.log")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )
    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      rdd.foreach(println)
      stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
    }


    ssc.start()
    ssc.awaitTermination()
  }


}

注意:在创建kafka流的时候要加上以下三个配置

  , "security.protocol" -> "SASL_PLAINTEXT"
      , "sasl.kerberos.service.name" -> "kafka"
      , "sasl.mechanism" -> "GSSAPI"

 

2. 提交任务

需要增加以下两个配置

--conf "spark.executor.extraJavaOptions=Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1" 
--driver-java-options "Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1" 

bin/spark-submit --master yarn 
--deploy-mode cluster 
--class security.view.model.AgentView   
--principal [email protected]  
--keytab /data/share/keytabpath/kafka.keytab  
--conf "spark.executor.extraJavaOptions=Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1" 
--driver-java-options "Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1"  

/data/modelx/spark/1281472784429481984.jar AgentView

3.kafka_client_jass.conf文件

KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useTicketCache=false
useKeyTab=true
keyTab="/data/share/keytabpath/kafka.keytab"
principal="[email protected]"
serviceName="kafka"
storeKey=true
renewTicket=true;
};

 

你可能感兴趣的:(spark)