sparksql关于:创建hive外部表create external talble以及createGlobalTempView

先上结论:

sparksql中不支持create external table外部表的创建,只能是非external表。使用write.option(“path”,"/some/path").saveAsTable是external表。
使用外部表,可以直接加载数据并加载到DateSet.createOrReplaceTempView中完成。
如果注册的表是createGlobalTempView,那么访问表需要加上数据库名,global_temp.tableName否在默认在default中查找会导致报错: Table or view ‘tableName’ not found in database ‘default’;

执行如下sql:

// spark sql native syntax "USING HIVE"
sql("create external table if not exists src(key INT ,value STRING ) row format delimited  fields terminated by '\t'")

报错信息:

Exception in thread "main" org.apache.spark.sql.catalyst.parser.ParseException: 
Operation not allowed: CREATE EXTERNAL TABLE must be accompanied by LOCATION(line 1, pos 0)

== SQL ==
create external table if not exists src(key INT ,value STRING ) row format delimited  fields terminated by '	'
^^^

	at org.apache.spark.sql.catalyst.parser.ParserUtils$.operationNotAllowed(ParserUtils.scala:41)
	at org.apache.spark.sql.execution.SparkSqlAstBuilder$$anonfun$visitCreateHiveTable$1.apply(SparkSqlParser.scala:1153)
	at org.apache.spark.sql.execution.SparkSqlAstBuilder$$anonfun$visitCreateHiveTable$1.apply(SparkSqlParser.scala:1113)
	at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:108)
	at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitCreateHiveTable(SparkSqlParser.scala:1113)
	at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitCreateHiveTable(SparkSqlParser.scala:55)
	at org.apache.spark.sql.catalyst.parser.SqlBaseParser$CreateHiveTableContext.accept(SqlBaseParser.java:1206)
	at org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18)
	at org.apache.spark.sql.catalyst.parser.AstBuilder$$anonfun$visitSingleStatement$1.apply(AstBuilder.scala:72)
	at org.apache.spark.sql.catalyst.parser.AstBuilder$$anonfun$visitSingleStatement$1.apply(AstBuilder.scala:72)
	at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:108)
	at org.apache.spark.sql.catalyst.parser.AstBuilder.visitSingleStatement(AstBuilder.scala:71)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser$$anonfun$parsePlan$1.apply(ParseDriver.scala:70)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser$$anonfun$parsePlan$1.apply(ParseDriver.scala:69)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:98)
	at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
	at com.example.sql.hive.SparkHiveExample$.main(SparkHiveExample.scala:36)
	at com.example.sql.hive.SparkHiveExample.main(SparkHiveExample.scala)

查看源码:
SparkSession.scala

  def sql(sqlText: String): DataFrame = {
    Dataset.ofRows(self, sessionState.sqlParser.parsePlan(sqlText))
  }

解析sqlText成逻辑计划:sqlText ->LogicalPlan
SparkSqlAstBuilder将ANTLR 解析sql得到的ParseTree 转换成:LogicalPlan/Expression/TableIdentifier.
其中CreateTableHeaderContext用于解析:CREATE TEMPORARY TABLE … IF NOT EXISTS等称为TableHeader信息

	public static class CreateTableHeaderContext extends ParserRuleContext {
		public TerminalNode CREATE() { return getToken(SqlBaseParser.CREATE, 0); }
		public TerminalNode TABLE() { return getToken(SqlBaseParser.TABLE, 0); }
		public TableIdentifierContext tableIdentifier() {
			return getRuleContext(TableIdentifierContext.class,0);
		}
		public TerminalNode TEMPORARY() { return getToken(SqlBaseParser.TEMPORARY, 0); }
		public TerminalNode EXTERNAL() { return getToken(SqlBaseParser.EXTERNAL, 0); }
		public TerminalNode IF() { return getToken(SqlBaseParser.IF, 0); }
		public TerminalNode NOT() { return getToken(SqlBaseParser.NOT, 0); }
		public TerminalNode EXISTS() { return getToken(SqlBaseParser.EXISTS, 0); }
		public CreateTableHeaderContext(ParserRuleContext parent, int invokingState) {
			super(parent, invokingState);
		}

其中SparkSqlAstBuilder中有如下代码,当出现external给出报错。

// SparkSqlAstBuilder
  override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
    val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
    if (external) {
      operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
    }

    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
    ...

之前已经有人发起过这个问题:SPARK-2825:Allow creating external tables in metastore

spark中createTempView注册的表和hive中external具有相同作用。

最后补充:spark中createOrReplaceTempView只是一个临时视图,程序结束,则失效,使用saveAsTable将表持久化到Hive Metastore中。
如何持久化到hive中形成external表以表drop表时数据被删除?

df.write.option(“path”,"/some/path").saveAsTable(“t”) 带有path,表删除,数据不会被删除,是external表。
df.write.saveAsTable(“t”) 这种不带path,删除表,数据也会被删除

你可能感兴趣的:(后端开发,大数据相关)