一.环境描述
spark提交job到yarn报错,业务代码比较简单,通过接口调用获取数据,将数据通过sparksql将数据写入hive中,尝试各种替换hadoop版本,最后拿下
1.hadoop环境
2.项目 pom.xml
spark-submit \
--name GridCorrelationMain \
--master yarn \
--deploy-mode cluster \
--executor-cores 2 \
--executor-memory 4G \
--num-executors 5 \
--driver-memory 2G \
--class cn.zd.maincode.wangge.GridCorrelationMain \
/home/boeadm/zwj/iot/cp-etl-spark-data/target/cp_zhengda_spark_utils-1.0-SNAPSHOT.jar
eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2OTI0MzU5NjgsImlhdCI6MTY5MjM0OTU2Mywic3ViIjo1MjB9.rCmnhF2EhdzH62T7lP3nmxQSxh17PotscxEcZkjL5hk
org.apache.commons
commons-configuration2
2.9.0
org.apache.spark
spark-core_2.11
2.3.3
hadoop-client
org.apache.hadoop
slf4j-log4j12
org.slf4j
org.apache.spark
spark-sql_2.11
2.3.3
org.apache.hadoop
hadoop-common
${hadoop.version}
commons-codec
commons-codec
commons-httpclient
commons-httpclient
org.apache.hadoop
hadoop-client
${hadoop.version}
hadoop-common
org.apache.hadoop
org.apache.hadoop
hadoop-hdfs
${hadoop.version}
org.apache.spark
spark-hive_2.11
2.3.2
hive-exec
org.spark-project.hive
hive-metastore
org.spark-project.hive
org.apache.hadoop
hadoop-mapreduce-client-core
${hadoop.version}
org.apache.hive
hive-jdbc
org.eclipse.jetty.aggregate
jetty-all
org.apache.hive
hive-shims
hbase-mapreduce
org.apache.hbase
hbase-server
org.apache.hbase
log4j-slf4j-impl
org.apache.logging.log4j
slf4j-log4j12
org.slf4j
2.1.1
org.apache.httpcomponents
httpclient
4.5.13
commons-codec
commons-codec
commons-codec
commons-codec
1.15
com.typesafe
config
1.3.1
com.alibaba
fastjson
1.2.62
com.alibaba
fastjson
${fastjson.version}
org.json
json
20160810
com.github.qlone
retrofit-crawler
1.0.0
com.oracle.database.jdbc
ojdbc8
12.2.0.1
mysql
mysql-connector-java
5.1.40
javax.mail
javax.mail-api
1.5.6
org.apache.commons
commons-email
1.4
3.项目集群提交报错
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupRelation(SessionCatalog.scala:696)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:730)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:685)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:715)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:654)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
at cn.zd.maincode.wangge.GridCorrelationMain$.createDataFrameAndTempView(GridCorrelationMain.scala:264)
at cn.zd.maincode.wangge.GridCorrelationMain$.horecaGridInfo(GridCorrelationMain.scala:148)
at cn.zd.maincode.wangge.GridCorrelationMain$.main(GridCorrelationMain.scala:110)
at cn.zd.maincode.wangge.GridCorrelationMain.main(GridCorrelationMain.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:673)
Caused by: java.lang.ExceptionInInitializerError
at org.apache.hadoop.hive.conf.HiveConf.(HiveConf.java:105)
at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:153)
at org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:118)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:292)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:395)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:284)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:68)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:67)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
... 72 more
Caused by: java.lang.IllegalArgumentException: Unrecognized Hadoop major version number: 3.0.0-cdh6.3.2
at org.apache.hadoop.hive.shims.ShimLoader.getMajorVersion(ShimLoader.java:169)
at org.apache.hadoop.hive.shims.ShimLoader.loadShims(ShimLoader.java:134)
at org.apache.hadoop.hive.shims.ShimLoader.getHadoopShims(ShimLoader.java:95)
at org.apache.hadoop.hive.conf.HiveConf$ConfVars.(HiveConf.java:354)
... 88 moreEnd of LogType:stderr
4.最终解决方式
将相关依赖不打进包中
org.apache.hive
hive-jdbc
org.eclipse.jetty.aggregate
jetty-all
org.apache.hive
hive-shims
hbase-mapreduce
org.apache.hbase
hbase-server
org.apache.hbase
log4j-slf4j-impl
org.apache.logging.log4j
slf4j-log4j12
org.slf4j
2.1.1
org.apache.httpcomponents
httpclient
4.5.13
commons-codec
commons-codec
commons-codec
commons-codec
1.15
com.typesafe
config
1.3.1
com.alibaba
fastjson
1.2.62
com.alibaba
fastjson
${fastjson.version}
org.json
json
20160810
com.github.qlone
retrofit-crawler
1.0.0
com.oracle.database.jdbc
ojdbc8
12.2.0.1
mysql
mysql-connector-java
5.1.40
javax.mail
javax.mail-api
1.5.6
org.apache.commons
commons-email
1.4