flink+watermark+eventtime案例

package com.thoughtworks.window

import java.util.Properties

import com.thoughtworks.source.SensorReading
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011

/**
  * WaterMarkWindow
  * 测试数据
  * ID        时间        温度
  * sensor_1 1547718100 35.8
  * sensor_1 1547718101 34.8
  * sensor_1 1547718102 33.8
  * sensor_1 1547718103 32.8
  * sensor_1 1547718104 31.8
  * sensor_1 1547718105 30.8
  * sensor_1 1547718106 29.8
  * sensor_1 1547718107 28.8
  * sensor_1 1547718108 27.8
  * sensor_1 1547718109 26.8
  * sensor_1 1547718110 25.8
  * sensor_1 1547718111 20.8
  * sensor_1 1547718112 19.8
  * sensor_1 1547718113 18.8
  * sensor_1 1547718114 17.8
  * sensor_1 1547718115 16.8
  * sensor_1 1547718116 15.8
  *
  * sensor_1 1547718100 23.1
  * sensor_1 1547718101 24.1
  * sensor_1 1547718102 25.1
  * sensor_1 1547718103 26.1
  * sensor_1 1547718104 27.1
  * sensor_1 1547718105 28.1
  * sensor_1 1547718106 29.1
  * sensor_1 1547718107 30.2
  * sensor_1 1547718108 31.2
  * sensor_1 1547718109 19.2
  * sensor_1 1547718110 18.2
  * sensor_1 1547718111 17.2
  * sensor_1 1547718112 16.2
  * sensor_1 1547718113 15.2
  * sensor_1 1547718114 14.2
  * sensor_1 1547718115 13.2
  * sensor_1 1547718116 12.2
  * sensor_1 1547718117 11.2
  * sensor_1 1547718118 8.2
  * sensor_1 1547718119 9.2
  */
object WaterMarkWindow {
  def main(args: Array[String]): Unit = {
    // 1. 环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    // !!!设置执行以EventTime为准
    // EventTime  事件发生的时间
    // ProcessingTime 处理消息的时间
    // IngestionTime  进入系统的时间
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    // !!!设置默认的Watermark间隔
    //    env.getConfig.setAutoWatermarkInterval(5000)

    // 2. source  kafka
    val properties = new Properties()
    // kafka 集群地址和端口
    properties.setProperty("bootstrap.servers", "node02:9092,node03:9092,node04:9092")
    // 消费组
    properties.setProperty("group.id", "flink")
    // key 序列化
    properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    // value 序列化
    properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    // 自动维护偏移量
    properties.setProperty("auto.offset.reset", "latest")

    /**
      * 第一个参数      tipic
      * 第二个参数     用于在Kafka的字节消息和Flink对象之间进行转换的反/序列化器。
      * 第三个参数     kafka的properties
      */
    val stream: DataStream[String] = env.addSource(new FlinkKafkaConsumer011[String]("flink", new SimpleStringSchema(), properties))

    // 3. transformation
    val waterMarkStream: DataStream[SensorReading] = stream.map(data => {
      val split: Array[String] = data.split(" ")
      SensorReading(split{0}, split{1}.toLong,split{2}.toDouble)
    })
      // 升序数据分配时间戳
      //  .assignAscendingTimestamps(_.timeStamp*1000)

      // 乱序数据分配时间戳和watermark
      .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SensorReading](Time.milliseconds(1000)) {
      override def extractTimestamp(data: SensorReading): Long = {
        data.timeStamp * 1000
      }
    })
    val result = waterMarkStream
      .keyBy(_.id)
      // 设置时间窗口
      .window(SlidingEventTimeWindows.of(Time.seconds(5),Time.seconds(2)))
      // 允许延迟时间
      .allowedLateness(Time.milliseconds(2))
      .reduce((data1, data2) => (SensorReading(data1.id,data1.timeStamp.min(data2.timeStamp), data1.temperature.min(data2.temperature))))

    // 4. sink
    result.print("stream").setParallelism(1)

    // 5. execute
    env.execute("API Test")
  }
}

抛出一个问题,窗口长度为5S,滑动为2S,延迟设置为2S,为何窗口长度为9秒???
时间倒序输入长度为5S???
正序输入长度为9S???
感觉实现出来了但是对watermark的理解不够
想要源码的同学留邮箱即可

你可能感兴趣的:(flink,flink,flink,watermark,flink,eventtime)