使用Spark读取已开启Kerberos认证的HBase

1、首先获取相应的krb5.conf与keytab文件
2、代码示例:

package com.zhbr.hbase.test

import java.io.IOException
import com.google.protobuf.ServiceException
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.{HBaseAdmin, HTable}
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.security.UserGroupInformation
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by drguo on 2018/7/18.
  */
object SparkExecHBase {

  def main(args: Array[String]): Unit = {
  	//加载krb5.conf文件
    System.setProperty("java.security.krb5.conf", "/home/sgbigdata/keytab/krb5.conf")

	//获取sparkSession
    val sparkSession = SparkSession.builder().appName(this.getClass.getSimpleName.filter(!_.equals('$'))).getOrCreate()

    //获取sparkContext
    val sparkContext = sparkSession.sparkContext

    //设置日志级别
    sparkContext.setLogLevel("WARN")

    //指定HBASE的表
    val tableName = "DWD_AMR_GS_METER-E-CURVE_201902"

    //设置HBaseConfiguration
    val hbaseConf = HBaseConfiguration.create()
    //设备zookeeper集群地址
    hbaseConf.set("hbase.zookeeper.quorum","10.213.111.XXX,10.213.111.XXX,10.213.111.XXX")
    //设置zookeeper端口
    hbaseConf.set("hbase.zookeeper.property.clientPort","2181")
    //设置要读取的表名
    hbaseConf.set(TableInputFormat.INPUT_TABLE,tableName)
    hbaseConf.set("hadoop.security.authentication", "Kerberos")

	//登录kerberos
    UserGroupInformation.setConfiguration(hbaseConf)
    try {
      UserGroupInformation.loginUserFromKeytab("YJ00004", "/home/sgbigdata/keytab/YJ00004.keytab")
      HBaseAdmin.checkHBaseAvailable(hbaseConf)
    } catch {
      case e: IOException =>
        e.printStackTrace()
      case e: ServiceException =>
        e.printStackTrace()
    }

	//读取表,获取RDD
    val hbaseRdd = sc.newAPIHadoopRDD(hbaseConf, classOf[TableInputFormat], 
    				classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 
    				classOf[org.apache.hadoop.hbase.client.Result])

    //遍历数据,转换成dateFrame类型
	val DataDF = hbaseRDD.map(result=>(
            Bytes.toDouble(result._2.getValue(Bytes.toBytes("DATA"),Bytes.toBytes("meterID-1"))),
            Bytes.toDouble(result._2.getValue(Bytes.toBytes("DATA"),Bytes.toBytes("meterID-2")))
        )).toDF("meterID-1","meterID-2")
    
    //注册临时表
    DataDF.createTempView("DLZZ")
    sparkSession.sql("select * from DLZZ").show()

    //关闭sparkSession
    sparkSession.stop()
  }
}

如出现下列bug,往往是因为System.setProperty(“java.security.krb5.conf”, “/home/sgbigdata/keytab/krb5.conf”)中的krb5.conf文件没有找到(比如路径错误)或是里面配置的kdc、admin_server地址错误。

Exception in thread “main” java.lang.IllegalArgumentException: Can’t get Kerberos realm 
at org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:65) 
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:319) 
at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:374) 
at drguo.test.SparkExecHBase$.main(SparkExecHBase.scala:32) 
at drguo.test.SparkExecHBase.main(SparkExecHBase.scala) 
Caused by: java.lang.reflect.InvocationTargetException 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
at java.lang.reflect.Method.invoke(Method.java:498) 
at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:84) 
at org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:63) 
… 4 more 
Caused by: KrbException: Cannot locate default realm 
at sun.security.krb5.Config.getDefaultRealm(Config.java:1029) 
… 10 more

你可能感兴趣的:(Spark,HBase)