def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.appName("TopNApp")
.master("local[2]")
.enableHiveSupport()
.getOrCreate()
val userClickDF = spark.table("user_click")
userClickDF.show(10)
}
Exception in thread "main" java.lang.IllegalArgumentException: Unable to instantiate SparkSession with Hive support because Hive classes are not found.
at org.apache.spark.sql.SparkSession$Builder.enableHiveSupport(SparkSession.scala:869)
at homework0522.OverwriteTopN$.main(OverwriteTopN.scala:12)
at homework0522.OverwriteTopN.main(OverwriteTopN.scala)
"SparkSession.scala"
/**
* Enables Hive support, including connectivity to a persistent Hive metastore, support for
* Hive serdes, and Hive user-defined functions.
*
* @since 2.0.0
*/
def enableHiveSupport(): Builder = synchronized {
"在这里进行if判断的时候找不到hive class"
if (hiveClassesArePresent) {
config(CATALOG_IMPLEMENTATION.key, "hive")
} else {
throw new IllegalArgumentException(
"Unable to instantiate SparkSession with Hive support because " +
"Hive classes are not found.")
}
}
/**
* @return true if Hive classes can be loaded, otherwise false.
*/
private[spark] def hiveClassesArePresent: Boolean = {
try {
"这里通过Class.forName去找下面的两个类,第一个类的时候就找不到了"
Utils.classForName(HIVE_SESSION_STATE_BUILDER_CLASS_NAME)
Utils.classForName("org.apache.hadoop.hive.conf.HiveConf")
true
} catch {
case _: ClassNotFoundException | _: NoClassDefFoundError => false
}
}
"发现找不到HiveSessionStateBuilder"
private val HIVE_SESSION_STATE_BUILDER_CLASS_NAME =
"org.apache.spark.sql.hive.HiveSessionStateBuilder"
将$HIVE_HOME/lib下的spark-hive_2.11-2.4.2.jar
与spark-hive-thriftserver_2.11-2.4.2.jar
添加到project中
Exception in thread "main" java.lang.NoSuchFieldError: METASTORE_CLIENT_SOCKET_LIFETIME
at org.apache.spark.sql.hive.HiveUtils$.formatTimeVarsForHiveClient(HiveUtils.scala:194)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:285)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
"HiveUtils.scala"
/**
* Change time configurations needed to create a [[HiveClient]] into unified [[Long]] format.
*/
private[hive] def formatTimeVarsForHiveClient(hadoopConf: Configuration): Map[String, String] = {
// Hive 0.14.0 introduces timeout operations in HiveConf, and changes default values of a bunch
// of time `ConfVar`s by adding time suffixes (`s`, `ms`, and `d` etc.). This breaks backwards-
// compatibility when users are trying to connecting to a Hive metastore of lower version,
// because these options are expected to be integral values in lower versions of Hive.
//
// Here we enumerate all time `ConfVar`s and convert their values to numeric strings according
// to their output time units.
Seq(
ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY -> TimeUnit.SECONDS,
ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT -> TimeUnit.SECONDS,
"在这里读不到值"
ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME -> TimeUnit.SECONDS,
...
).map { case (confVar, unit) =>
confVar.varname -> HiveConf.getTimeVar(hadoopConf, confVar, unit).toString
}.toMap
}
"HiveConf.java"
public static enum ConfVars {
SCRIPTWRAPPER("hive.exec.script.wrapper", (Object)null, ""),
PLAN("hive.exec.plan", "", ""),
...
}
发现ConfVars中定义的变量并没有METASTORE_CLIENT_SOCKET_LIFETIME
,而HiveConf.java
来自于hive-exec-1.1.0-cdh5.7.0.jar
,即证明hive1.1.0中并没有假如该参数。
将hive依赖换为1.2.1
<properties>
...
<hive.version>1.2.1hive.version>
properties>
...
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-execartifactId>
<version>${hive.version}version>
dependency>
Exception in thread "main" org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
Caused by: java.lang.reflect.InvocationTargetException
Caused by: MetaException(message:Could not connect to meta store using any of the URIs provided. Most recent failure: org.apache.thrift.transport.TTransportException: java.net.ConnectException: Connection refused: connect
Caused by: java.net.ConnectException: Connection refused: connect
这是因为远端没有启动hive造成的,启动hive时需要配置metastore。
$HIVE_HOME/bin/hive --service metastore &