1:启动hive的metastore服务
2:报错后加入依赖包spark-hive_2.12
3:创建SparkSession加入.enableHiveSupport()
通过spark SQL访问hive上的表数据。
将hive-site.xml文件,放入IDEA程序的resource目录下
object SparkSqlEvents {
def main(args: Array[String]) {
val conf = new SparkConf()
.setAppName("sparkSqlEvent")
val session = SparkSessionSingleton.getInstance(conf)
session.sparkContext.hadoopConfiguration.set("dfs.client.use.datanode.hostname","true")
import session.implicits._
session.sql(
"""
|SELECT `time`, id, topic, catagray, url
|FROM test.sougodata
|""".stripMargin).show(20)
}
}
object SparkSessionSingleton {
@transient private var instance: SparkSession = _
def getInstance(sparkConf: SparkConf): SparkSession = {
if (instance == null) {
instance = SparkSession
.builder
.config(sparkConf)
.enableHiveSupport()
.getOrCreate()
}
instance
}
}
Exception in thread "main" java.lang.IllegalArgumentException: Unable to instantiate SparkSession with Hive support because Hive classes are not found.
at org.apache.spark.sql.SparkSession$Builder.enableHiveSupport(SparkSession.scala:845)
at com.rachel.templates.utils.SparkSessionSingleton$.getInstance(SparkSessionSingleton.scala:28)
at com.rachel.templates.sql.SparkSqlEvents$.main(SparkSqlEvents.scala:11)
at com.rachel.templates.sql.SparkSqlEvents.main(SparkSqlEvents.scala)
定位错误代码
org.apache.spark.sql.SparkSession.Builder#enableHiveSupport
即如下代码块
def enableHiveSupport(): Builder = synchronized {
if (hiveClassesArePresent) {
config(CATALOG_IMPLEMENTATION.key, "hive")
} else {
throw new IllegalArgumentException(
"Unable to instantiate SparkSession with Hive support because " +
"Hive classes are not found.")
}
}
接着追溯:
/**
* @return true if Hive classes can be loaded, otherwise false.
*/
private[spark] def hiveClassesArePresent: Boolean = {
try {
Utils.classForName(HIVE_SESSION_STATE_BUILDER_CLASS_NAME)
Utils.classForName("org.apache.hadoop.hive.conf.HiveConf")
true
} catch {
case _: ClassNotFoundException | _: NoClassDefFoundError => false
}
}
HIVE_SESSION_STATE_BUILDER_CLASS_NAME
即org.apache.spark.sql.hive.HiveSessionStateBuilder
ctrl+shift+T确实没有找到org.apache.spark.sql.hive.HiveSessionStateBuilder
找该类对应的依赖包:
org.apache.spark
spark-hive_2.12
2.4.5
provided
再次启动程序出现错误
20/04/21 11:20:06 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
原因:没有启动hive的metastore服务。
[hadoop@hadoop001 bin]$ ./hive --service metastore &
0/04/21 11:24:26 INFO DAGScheduler: Job 0 finished: show at SparkSqlEvents.scala:20, took 2.275769 s
20/04/21 11:24:26 INFO CodeGenerator: Code generated in 19.6427 ms
+--------+-----------------+----------------+--------+--------------------+
| time| id| topic|catagray| url|
+--------+-----------------+----------------+--------+--------------------+
|00:00:00| 2982199073774412| [360安全卫士]| 8 3|download.it.com.c...|
|00:00:00|07594220010824798| [哄抢救灾物资]| 1 1|news.21cn.com/soc...|