Flink三:EventTime与WaterMark生成记录

import java.text.SimpleDateFormat

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.streaming.api.scala.function.WindowFunction
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

case class Raw(key: String, eventTime: String)

object WaterMarkCs extends App {
  val env = StreamExecutionEnvironment.getExecutionEnvironment
  env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

  val input1 = env.fromElements(
    Raw("000001", "2019-12-28 21:41:32")
    , Raw("000001", "2019-12-28 21:41:33")
    , Raw("000001", "2019-12-28 21:41:34")
    , Raw("000001", "2019-12-28 21:41:36")
    , Raw("000001", "2019-12-28 21:41:37")
    , Raw("000001", "2019-12-28 21:41:39")
    , Raw("000001", "2019-12-28 21:41:31")
    /*, Raw("000001", "2019-12-28 21:41:10")
    ,Raw("000001", "2019-12-28 21:41:29")
    ,Raw("000001", "2019-12-28 21:41:25")*/
  )
    .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGentor())
    .keyBy(_.key)
    .window(TumblingEventTimeWindows.of(Time.seconds(3)))
    .apply(new WindowFunctionDeal)
    .print()

  env.execute("Flink 测试WaterMark变化_Starting.....")
}

class BoundedOutOfOrdernessGentor extends AssignerWithPeriodicWatermarks[Raw] {
  var currentMaxTimestamp = 0L
  val maxOutOfOrderness = 10000L //最大允许的乱序时间是10s
  val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  //当前Watermark = (最大时间戳 - 允许最大延迟时间)
  //注意:WATERMARK是一个全局的值,不是某一个key下的值,所以即使不是同一个key的数据,其WATERMARK也会增加
  override def getCurrentWatermark: Watermark = {
    new Watermark(currentMaxTimestamp - maxOutOfOrderness)
  }

  override def extractTimestamp(elm: Raw, previousElementTimestamp: Long): Long = {
    val timestamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(elm.eventTime).getTime //获取EventTime对应的时间戳
    currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp) //获取最大时间错
    //      println("timestamp  :  " + timestamp)
    println(s"INPUT::::key: ${elm.key}, eventTime: ${elm.eventTime}, currentMaxTimestamp: ${format.format(currentMaxTimestamp)}," +
      s"watermark: ${format.format(getCurrentWatermark().getTimestamp)}")
    timestamp
  }
}

//继承WindowFunction,重载apply方法,获取窗口的开始、结束时间
class WindowFunctionDeal extends WindowFunction[Raw, String, String, TimeWindow]() {
  override def apply(key: String, window: TimeWindow, input: Iterable[Raw], out: Collector[String]): Unit = {
    val list = input.toList.sortBy(_.eventTime)
    val fmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
    out.collect(s"${key}, win_size:${input.size}, " +
      s"head_eventTime:${list.head.eventTime}, last_eventTime:${list.last.eventTime}, " +
      s"win_start:${fmt.format(window.getStart)}, win_end:${fmt.format(window.getEnd)}")
  }
}

输入结果请查看下图:

Flink三:EventTime与WaterMark生成记录_第1张图片

你可能感兴趣的:(Flink)