Spark 之 expression

##.

/**
 * Returns the number of days from startDate to endDate.
 */
@ExpressionDescription(
  usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
  examples = """
    Examples:
      > SELECT _FUNC_('2009-07-31', '2009-07-30');
       1

      > SELECT _FUNC_('2009-07-30', '2009-07-31');
       -1
  """,
  group = "datetime_funcs",
  since = "1.5.0")
case class DateDiff(endDate: Expression, startDate: Expression)
  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {

  override def left: Expression = endDate
  override def right: Expression = startDate
  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
  override def dataType: DataType = IntegerType

  override def nullSafeEval(end: Any, start: Any): Any = {
    end.asInstanceOf[Int] - start.asInstanceOf[Int]
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression, newRight: Expression): DateDiff =
    copy(endDate = newLeft, startDate = newRight)
}
允许有默认参数的表达式
// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ",
  arguments = """
    Arguments:
      * sourceTz - the time zone for the input timestamp.
                   If it is missed, the current session time zone is used as the source time zone.
      * targetTz - the time zone to which the input timestamp should be converted
      * sourceTs - a timestamp without time zone
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
       2021-12-05 15:00:00
      > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
       2021-12-06 00:00:00
  """,
  group = "datetime_funcs",
  since = "3.4.0")
// scalastyle:on line.size.limit
case class ConvertTimezone(
    sourceTz: Expression,
    targetTz: Expression,
    sourceTs: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {

  def this(targetTz: Expression, sourceTs: Expression) =
    this(CurrentTimeZone(), targetTz, sourceTs)

  override def first: Expression = sourceTz
  override def second: Expression = targetTz
  override def third: Expression = sourceTs

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
  override def dataType: DataType = TimestampNTZType

  override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
    DateTimeUtils.convertTimestampNtzToAnotherTz(
      srcTz.asInstanceOf[UTF8String].toString,
      tgtTz.asInstanceOf[UTF8String].toString,
      micros.asInstanceOf[Long])
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    defineCodeGen(ctx, ev, (srcTz, tgtTz, micros) =>
      s"""$dtu.convertTimestampNtzToAnotherTz($srcTz.toString(), $tgtTz.toString(), $micros)""")
  }

  override def prettyName: String = "convert_timezone"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): ConvertTimezone = {
    copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
  }
}
时间函数
  /**
   * Gets the difference between two timestamps.
   *
   * @param unit Specifies the interval units in which to express the difference between
   *             the two timestamp parameters.
   * @param startTs A timestamp which the function subtracts from `endTs`.
   * @param endTs A timestamp from which the function subtracts `startTs`.
   * @param zoneId The time zone ID at which the operation is performed.
   * @return The time span between two timestamp values, in the units specified.
   */
  def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
    val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
    if (timestampDiffMap.contains(unitInUpperCase)) {
      val startLocalTs = getLocalDateTime(startTs, zoneId)
      val endLocalTs = getLocalDateTime(endTs, zoneId)
      timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
    } else {
      throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
    }
  }
TimestampDiff

旧版表达式写法

// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: Expression,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends TernaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def first: Expression = unit
  override def second: Expression = startTimestamp
  override def third: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      u.asInstanceOf[UTF8String].toString,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (u, s, e) =>
      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override protected def withNewChildrenInternal(
      newFirst: Expression,
      newSecond: Expression,
      newThird: Expression): TimestampDiff = {
    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
  }
}

新版 String 传参数 写法

// scalastyle:off line.size.limit
@ExpressionDescription(
  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
  arguments = """
    Arguments:
      * unit - this indicates the units of the difference between the given timestamps.
        Supported string values of `unit` are (case insensitive):
          - "YEAR"
          - "QUARTER" - 3 months
          - "MONTH"
          - "WEEK" - 7 days
          - "DAY"
          - "HOUR"
          - "MINUTE"
          - "SECOND"
          - "MILLISECOND"
          - "MICROSECOND"
      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
  """,
  examples = """
    Examples:
      > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
       8
      > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
       1
      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
       -10
      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
       10
  """,
  group = "datetime_funcs",
  since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampDiff(
    unit: String,
    startTimestamp: Expression,
    endTimestamp: Expression,
    timeZoneId: Option[String] = None)
  extends BinaryExpression
  with ImplicitCastInputTypes
  with NullIntolerant
  with TimeZoneAwareExpression {

  def this(unit: String, quantity: Expression, timestamp: Expression) =
    this(unit, quantity, timestamp, None)

  override def left: Expression = startTimestamp
  override def right: Expression = endTimestamp

  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
  override def dataType: DataType = LongType

  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
    copy(timeZoneId = Option(timeZoneId))

  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)

  override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
    DateTimeUtils.timestampDiff(
      unit,
      startMicros.asInstanceOf[Long],
      endMicros.asInstanceOf[Long],
      zoneIdInEval)
  }

  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
    defineCodeGen(ctx, ev, (s, e) =>
      s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
  }

  override def prettyName: String = "timestampdiff"

  override def sql: String = {
    val childrenSQL = (unit +: children.map(_.sql)).mkString(", ")
    s"$prettyName($childrenSQL)"
  }

  override protected def withNewChildrenInternal(
      newLeft: Expression,
      newRight: Expression): TimestampDiff = {
    copy(startTimestamp = newLeft, endTimestamp = newRight)
  }
}

你可能感兴趣的:(spark,大数据,分布式)