Antlr 使用

Spark 之 Antlr

        org.antlr
        antlr4-maven-plugin
        
          
            
              antlr4
            
          
        
        
          true
          ../catalyst/src/main/antlr4
          true
        
      
spark current
  override def visitCurrentLike(ctx: CurrentLikeContext): Expression = withOrigin(ctx) {
    if (conf.enforceReservedKeywords) {
      ctx.name.getType match {
        case SqlBaseParser.CURRENT_DATE =>
          CurrentDate()
        case SqlBaseParser.CURRENT_TIMESTAMP =>
          CurrentTimestamp()
        case SqlBaseParser.CURRENT_USER =>
          CurrentUser()
      }
    } else {
      // If the parser is not in ansi mode, we should return `UnresolvedAttribute`, in case there
      // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP`.
      UnresolvedAttribute.quoted(ctx.name.getText)
    }
  }
case class CurrentTimestamp() extends CurrentTimestampLike {
  override def prettyName: String = "current_timestamp"
}

abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback {
  override def foldable: Boolean = true
  override def nullable: Boolean = false
  override def dataType: DataType = TimestampType
  override def eval(input: InternalRow): Any = currentTimestamp()
  final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
}
G4 文件修改案例
scala> sql("select * from t2 where b = dayofmonth('2009-07-03')").show
+---+---+---+
|  a|  b|  c|
+---+---+---+
| a3|  3| c3|
+---+---+---+

scala> sql("select * from t2 where b = dayofmonth('2009-07-03')").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('b = 'dayofmonth(2009-07-03))
   +- 'UnresolvedRelation [t2], [], false

== Analyzed Logical Plan ==
a: string, b: string, c: string
Project [a#93, b#94, c#95]
+- Filter (cast(b#94 as int) = dayofmonth(cast(2009-07-03 as date)))
   +- SubqueryAlias spark_catalog.default.t2
      +- Relation default.t2[a#93,b#94,c#95] parquet

== Optimized Logical Plan ==
Filter (isnotnull(b#94) AND (cast(b#94 as int) = 3))
+- Relation default.t2[a#93,b#94,c#95] parquet

== Physical Plan ==
*(1) ColumnarToRow
+- FileScan parquet default.t2[a#93,b#94,c#95] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/{user}/Dev/project/tencent/spark-dewu-emr-3.3.2/spark-..., PartitionFilters: [isnotnull(b#94), (cast(b#94 as int) = 3)], PushedFilters: [], ReadSchema: struct

sql(“select * from t2 where b = (select max(b) from t2)”).show

scala> sql("select * from t2 where b = (select max(b) from t2)").show
+---+---+---+
|  a|  b|  c|
+---+---+---+
| a3| b3| c3|
+---+---+---+

sql(“select * from t2 where b = (select max(b) from t2)”).explain(true)

scala> sql("select * from t2 where b = (select max(b) from t2)").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('b = scalar-subquery#144 [])
   :  +- 'Project [unresolvedalias('max('b), None)]
   :     +- 'UnresolvedRelation [t2], [], false
   +- 'UnresolvedRelation [t2], [], false

== Analyzed Logical Plan ==
a: string, b: string, c: string
Project [a#93, b#94, c#95]
+- Filter (b#94 = scalar-subquery#144 [])
   :  +- Aggregate [max(b#148) AS max(b)#146]
   :     +- SubqueryAlias spark_catalog.default.t2
   :        +- Relation default.t2[a#147,b#148,c#149] parquet
   +- SubqueryAlias spark_catalog.default.t2
      +- Relation default.t2[a#93,b#94,c#95] parquet

== Optimized Logical Plan ==
Filter (isnotnull(b#94) AND (b#94 = scalar-subquery#144 []))
:  +- Aggregate [max(b#148) AS max(b)#146]
:     +- Project [b#148]
:        +- Relation default.t2[a#147,b#148,c#149] parquet
+- Relation default.t2[a#93,b#94,c#95] parquet

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- FileScan parquet default.t2[a#93,b#94,c#95] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(4 paths)[file:/Users/xiangxshen/Dev/project/tencent/spark-dewu-emr-3.3.2/spark-..., PartitionFilters: [isnotnull(b#94), (b#94 = Subquery subquery#144, [id=#221])], PushedFilters: [], ReadSchema: struct
      +- Subquery subquery#144, [id=#221]
         +- AdaptiveSparkPlan isFinalPlan=false
            +- SortAggregate(key=[], functions=[max(b#148)], output=[max(b)#146])
               +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=219]
                  +- SortAggregate(key=[], functions=[partial_max(b#148)], output=[max#154])
                     +- Project [b#148]
                        +- FileScan parquet default.t2[b#148,c#149] Batched: true, DataFilters: [], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/xiangxshen/Dev/project/tencent/spark-dewu-emr-3.3.2/spark-w..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>


scala> 

sql(“select * from t1 where b = (select dayofmonth(‘2009-07-03’) from t1)”).show

      sql(
        """
          |CREATE TABLE t1(a string, b string, c string)
          |USING PARQUET
          |PARTITIONED BY (b,c)""".stripMargin)



          val insert = sql(
        """
          |INSERT INTO t1 VALUES
          |('a3', '3', 'c3')""".stripMargin)

scala> sql("select * from t1 where b = (select dayofmonth('2009-07-03') from t1)").show
+---+---+---+
|  a|  b|  c|
+---+---+---+
| a3|  3| c3|
+---+---+---+

sql(“select * from t1 where b = (select dayofmonth(‘2009-07-03’) from t1)”).explain(true)

scala> sql("select * from t1 where b = (select dayofmonth('2009-07-03') from t1)").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('b = scalar-subquery#213 [])
   :  +- 'Project [unresolvedalias('dayofmonth(2009-07-03), None)]
   :     +- 'UnresolvedRelation [t1], [], false
   +- 'UnresolvedRelation [t1], [], false

== Analyzed Logical Plan ==
a: string, b: string, c: string
Project [a#202, b#203, c#204]
+- Filter (cast(b#203 as int) = scalar-subquery#213 [])
   :  +- Project [dayofmonth(cast(2009-07-03 as date)) AS dayofmonth(2009-07-03)#214]
   :     +- SubqueryAlias spark_catalog.default.t1
   :        +- Relation default.t1[a#215,b#216,c#217] parquet
   +- SubqueryAlias spark_catalog.default.t1
      +- Relation default.t1[a#202,b#203,c#204] parquet

== Optimized Logical Plan ==
Filter (isnotnull(b#203) AND (cast(b#203 as int) = scalar-subquery#213 []))
:  +- Project [3 AS dayofmonth(2009-07-03)#214]
:     +- Relation default.t1[a#215,b#216,c#217] parquet
+- Relation default.t1[a#202,b#203,c#204] parquet

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- FileScan parquet default.t1[a#202,b#203,c#204] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/xiangxshen/Dev/project/tencent/spark-dewu-emr-3.3.2/spark-..., PartitionFilters: [isnotnull(b#203), (cast(b#203 as int) = Subquery subquery#213, [id=#332])], PushedFilters: [], ReadSchema: struct
      +- Subquery subquery#213, [id=#332]
         +- AdaptiveSparkPlan isFinalPlan=false
            +- Project [3 AS dayofmonth(2009-07-03)#214]
               +- FileScan parquet default.t1[b#216,c#217] Batched: true, DataFilters: [], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/xiangxshen/Dev/project/tencent/spark-dewu-emr-3.3.2/spark-w..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>

你可能感兴趣的:(spark,Anltr)