Spark 读取 Hive 数据及相关问题解决

  1. 示例代码
    1. SparkHiveAPP 主类

      注意:
      需要将 core-site.xml,hdfs-site.xml, yarn-site.xml,mapred-site.xml 和 hive-site.xml 放到 resource 下面,程序运行的时候需要这些环境。

      import org.apache.log4j.{Level, Logger}
      import org.apache.spark.SparkConf
      import org.apache.spark.sql.SparkSession
      
      object SparkHiveAPP {
      
        def main(args: Array[String]): Unit = {
      
          Logger.getLogger("org").setLevel(Level.WARN)
          
          /**
            * 不设置 System.setProperty("HADOOP_USER_NAME", "root") 会出现异常
            * org.apache.hadoop.security.AccessControlException: Permission denied
            */
          System.setProperty("HADOOP_USER_NAME", "root")
          val conf = new SparkConf()
            .setIfMissing("spark.master", "local[2]")
            .set("spark.sql.warehouse.dir", "/user/hive/warehouse")
            .setAppName("Spark_Hive_APP")
      
          val spark: SparkSession = SparkSession.builder().config(conf)
            .enableHiveSupport()
            .getOrCreate()
      
          spark.sparkContext.setLogLevel("WARN")
      
          spark.sql("SELECT * FROM test.test1").show()
      
        }
      }
      
    2. pom.xml 文件
      
      
          4.0.0
          com.cloudera
          RemoteSubmitSparkToYarn
          1.0-SNAPSHOT
      
          jar
          RemoteSubmitSparkToYarn
      
          
              
              
                  cloudera
                  https://repository.cloudera.com/artifactory/cloudera-repos/
                  Cloudera Repositories
                  
                      true
                  
                  
                      false
                  
              
          
      
          
              UTF-8
              UTF-8
              1.8
              2.11.12
              1.3.0
              
              1.2.0
              0.10.0.1
              2.2.0
              compile
              compile
          
      
          
      
              
              
              
              
              
              
              
              
              
              
              
              
      
              
              
                  org.scala-lang
                  scala-library
                  ${scala.version}
                  ${provided.scope}
              
              
                  org.scala-lang
                  scala-compiler
                  ${scala.version}
                  ${provided.scope}
              
              
                  org.scala-lang
                  scala-reflect
                  ${scala.version}
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-core_2.11
                  ${spark.version}
                  
                      
                          org.glassfish.jersey.bundles.repackaged
                          jersey-guava
                      
                  
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-streaming_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-sql_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-hive_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.hive
                  hive-exec
                  ${hive.version}
              
              
                  org.apache.spark
                  spark-yarn_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-sql-kafka-0-10_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.spark
                  spark-streaming-kafka-0-10_2.11
                  ${spark.version}
                  ${provided.scope}
              
              
                  org.apache.kafka
                  kafka_2.11
                  ${kafka.version}
                  ${kafka.scope}
              
              
                  org.apache.kafka
                  kafka-clients
                  0.10.0.1
                  ${kafka.scope}
              
          
      
          
              
                  
                      
                          org.apache.maven.plugins
                          maven-compiler-plugin
                          3.8.0
                          
                              1.8
                              1.8
                          
                      
                      
                          org.apache.maven.plugins
                          maven-resources-plugin
                          3.0.2
                          
                              UTF-8
                          
                      
                      
                          net.alchim31.maven
                          scala-maven-plugin
                          3.2.2
                          
                              
                                  
                                      compile
                                      testCompile
                                  
                              
                          
                      
                      
                          org.apache.maven.plugins
                          maven-resources-plugin
                          3.0.2
                          
                              UTF-8
                          
                      
                  
              
              
                  
                      net.alchim31.maven
                      scala-maven-plugin
                      
                          
                              scala-compile-first
                              process-resources
                              
                                  add-source
                                  compile
                              
                          
                          
                              scala-test-compile
                              process-test-resources
                              
                                  testCompile
                              
                          
                      
                  
      
                  
                      org.apache.maven.plugins
                      maven-compiler-plugin
                      
                          
                              compile
                              
                                  compile
                              
                          
                      
                  
      
                  
                      org.apache.maven.plugins
                      maven-shade-plugin
                      2.4.3
                      
                          
                              package
                              
                                  shade
                              
                              
                                  
                                      
                                          *:*
                                          
                                              META-INF/*.SF
                                              META-INF/*.DSA
                                              META-INF/*.RSA
                                          
                                      
                                  
                              
                          
                      
                  
              
              
                  
                      ${basedir}/src/main/resources
                      
                          env/*/*
                      
                      
                          **/*
                      
                  
                  
                      ${basedir}/src/main/resources/env/${profile.active}
                      
                          **/*.properties
                          **/*.xml
                      
                  
              
          
          
              
                  dev
                  
                      dev
                  
                  
                      true
                  
              
              
                  test
                  
                      test
                  
              
              
                  prod
                  
                      prod
                  
              
          
      
      
    3. 运行结果
      Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
      18/06/27 10:30:40 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
      18/06/27 10:30:41 WARN ShellBasedUnixGroupsMapping: got exception trying to get groups for user root: GetLocalGroupsForUser error (1332): ?????????????????
      
      
      
      18/06/27 10:30:41 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:41 INFO metastore: Connected to metastore.
      18/06/27 10:30:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      +---+--------+------------+
      | id|    name|       hobby|
      +---+--------+------------+
      |  1|zhangsan|[唱歌, 跳舞, 游泳]|
      |  2|    lisi|   [打游戏, 篮球]|
      |  3|  wangwu|    [唱歌, 游泳]|
      +---+--------+------------+
      
      
      Process finished with exit code 0
      
  1. 遇到的问题
    1. 本地找不到未 winutils 二进制文件

      问题日志:

      18/06/27 10:35:18 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
      java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
          at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:378)
          at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:393)
          at org.apache.hadoop.util.Shell.getGroupsForUserCommand(Shell.java:163)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.fetchGroupList(Groups.java:231)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.load(Groups.java:211)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.load(Groups.java:199)
          at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3524)
          at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2317)
          at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2280)
          at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2195)
          at com.google.common.cache.LocalCache.get(LocalCache.java:3934)
          at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3938)
          at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4821)
          at org.apache.hadoop.security.Groups.getGroups(Groups.java:173)
          at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1552)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:236)
          at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
          at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
          at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
          at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1234)
          at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
          at org.apache.hadoop.hive.ql.metadata.Hive.(Hive.java:166)
          at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
          at org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:191)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
          at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
          at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
          at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
          at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
          at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at scala.Option.getOrElse(Option.scala:121)
          at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
          at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
          at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
          at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
          at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
          at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
          at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
      

      解决办法:

      1. 下载 winutils 文件。下载地址: https://github.com/steveloughran/winutils

      2. 设置环境变量 HADOOP_HOME 。
        在本地机器中配置: HADOOP_HOME=D:\winutils-master\hadoop-2.6.0

        或在 idea 中运行参数设置 HADOOP_HOME


        Spark 读取 Hive 数据及相关问题解决_第1张图片
        image
  1. 不能访问 metastore, 无法实例化 SessionHiveMetaStoreClient

    原因: 在上面 pom.xml 中把整合 HBsae 的相关jar引入后,访问 Hive 时会报以下异常,与未整合 HBsae 报错不一样。解决办法同上。

    问题日志:

    log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell).
    log4j:WARN Please initialize the log4j system properly.
    log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
    Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    18/06/27 10:47:01 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
    18/06/27 10:47:01 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
    org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
        at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
        at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
        at org.apache.hadoop.hive.ql.metadata.Hive.(Hive.java:166)
        at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
        at org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:191)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
        at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
        at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
        at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
        at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
        at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
        at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
        at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
        at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
        at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
        at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
        at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
        at scala.Option.getOrElse(Option.scala:121)
        at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
        at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
        at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
        at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
        at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
        at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
        at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
        at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
        at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
        at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
    Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
        at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
        at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
        at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
        at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1234)
        ... 41 more
    Caused by: java.lang.reflect.InvocationTargetException
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
        ... 47 more
    Caused by: java.lang.NullPointerException
        at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
        at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
        at org.apache.hadoop.util.Shell.run(Shell.java:455)
        at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:774)
        at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
        at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
        at org.apache.hadoop.security.Groups.getGroups(Groups.java:139)
        at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1474)
        at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
        at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:236)
        at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)
        ... 52 more
    18/06/27 10:47:01 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
    Exception in thread "main" java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder':
        at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1053)
        at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
        at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
        at scala.Option.getOrElse(Option.scala:121)
        at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
        at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
        at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
        at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
        at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
        at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
        at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
        at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
        at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
        at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
        at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
    Caused by: org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
        at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)
        at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
        at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
        at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
        at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
        at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
        at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
        ... 15 more
    Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
        at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
        at org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:191)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
        at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
        at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
        at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
        at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
        at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
        ... 24 more
    Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
        at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
        at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
        at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
        at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
        at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
        ... 38 more
    Caused by: java.lang.reflect.InvocationTargetException
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
        ... 44 more
    Caused by: java.lang.NullPointerException
        at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
        at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
        at org.apache.hadoop.util.Shell.run(Shell.java:455)
        at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
        at org.apache.hadoop.util.Shell.execCommand(Shell.java:774)
        at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
        at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
        at org.apache.hadoop.security.Groups.getGroups(Groups.java:139)
        at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1474)
        at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
        at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:236)
        at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)
        ... 49 more
    
    Process finished with exit code 1
    
    

你可能感兴趣的:(Spark 读取 Hive 数据及相关问题解决)