1. 代码
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.learn.function.{WordCountFlatMapFunction, WordCountProcessFunction}
import org.learn.source.SourceForTest
object StateWordCount {
def main(args: Array[String]): Unit = {
val parameters: ParameterTool = ParameterTool.fromArgs(args)
val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
streamEnv.getConfig.setGlobalJobParameters(parameters)
streamEnv
.addSource(new SourceForTest)
.setParallelism(1)
.flatMap(new WordCountFlatMapFunction)
.keyBy(_._1)
.process(new WordCountProcessFunction())
streamEnv.execute()
}
}
1.1 Source
import java.io.{BufferedReader, FileReader}
import java.util.concurrent.TimeUnit
import org.apache.commons.lang3.StringUtils
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
class SourceForTest extends RichSourceFunction[String] {
private var isRunning: Boolean = true
override def run(sourceContext: SourceFunction.SourceContext[String]): Unit = {
val bufferedReader: BufferedReader = new BufferedReader(new FileReader("F:\\test.txt"))
while (isRunning) {
val line: String = bufferedReader.readLine();
if (StringUtils.isNotBlank(line)) {
sourceContext.collect(line);
}
TimeUnit.SECONDS.sleep(10);
}
}
override def cancel(): Unit = {
isRunning = false
}
}
1.2 Map
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.util.Collector
class WordCountFlatMapFunction extends RichFlatMapFunction[String, (String, Int)] {
override def flatMap(value: String, out: Collector[(String, Int)]): Unit = {
val arr: Array[String] = value.split(",")
for (item <- arr) {
out.collect(Tuple2.apply(item, 1))
}
}
}
1.3 Process
import org.apache.flink.api.common.state.{MapState, MapStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.util.Collector
class WordCountProcessFunction extends KeyedProcessFunction[String, (String, Int), (String, Int)] {
private var mapState: MapState[String, (String, Int)] = _
private var timerState: MapState[Long, Long] = _
override def open(parameters: Configuration): Unit = {
var mapStateDesc = new MapStateDescriptor[String, (String, Int)]("valueStateDesc", classOf[String], classOf[(String, Int)])
mapState = getRuntimeContext.getMapState(mapStateDesc)
val timerStateDesc = new MapStateDescriptor[Long, Long]("timerStateDesc", classOf[Long], classOf[Long])
timerState = getRuntimeContext.getMapState(timerStateDesc)
}
override def processElement(value: (String, Int), ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#Context, out: Collector[(String, Int)]): Unit = {
var currentState: (String, Int) = mapState.get(value._1)
if (null == currentState) {
currentState = (value._1, 0)
// TTL时间
val ttlTime: Long = System.currentTimeMillis() - 30 * 1000 // 设置一个历史时间
ctx.timerService().registerProcessingTimeTimer(ttlTime)
timerState.put(ttlTime, ttlTime)
}
var newState: (String, Int) = (currentState._1, currentState._2 + value._2)
mapState.put(value._1, newState)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#OnTimerContext, out: Collector[(String, Int)]): Unit = {
System.out.println("clear..." + " timestamp: " + timestamp + " currentTime: " + System.currentTimeMillis() + " timerState: ")
val iter = timerState.keys().iterator()
while (iter.hasNext) {
val key = iter.next()
System.out.println("key: " + key + " value: " + timerState.get(key))
}
mapState.clear()
}
}
设置TTL时间为历史时间,看看定时器如何触发?
2. 结果
clear... timestamp: 1597194982850 currentTime: 1597195012866 timerState:
key: 1597194982850 value: 1597194982850
clear... timestamp: 1597194992895 currentTime: 1597195022911 timerState:
key: 1597194992895 value: 1597194992895
key: 1597194982850 value: 1597194982850
clear... timestamp: 1597195002910 currentTime: 1597195032925 timerState:
key: 1597195002910 value: 1597195002910
key: 1597194992895 value: 1597194992895
key: 1597194982850 value: 1597194982850
从结果可见:
- 给 TimeService 设置 TTL 时间为历史时间,定时器也会触发
- 调用的
onTimer(timestamp, ctx, out)
函数中,参数 timestamp 的值是设置的历史时间,而不是当前时间,当前时间已经大于了 timestamp。
3. 分析
当启动 TimeService 时,会注册 Timer,看看源码:
-
进入
org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService.java
public ScheduledFuture> registerTimer(long timestamp, ProcessingTimeCallback target) { // delay the firing of the timer by 1 ms to align the semantics with watermark. A watermark // T says we won't see elements in the future with a timestamp smaller or equal to T. // With processing time, we therefore need to delay firing the timer by one ms. long delay = Math.max(timestamp - getCurrentProcessingTime(), 0) + 1; // we directly try to register the timer and only react to the status on exception // that way we save unnecessary volatile accesses for each timer try { return timerService.schedule( new TriggerTask(status, task, checkpointLock, target, timestamp), delay, TimeUnit.MILLISECONDS); } catch (RejectedExecutionException e) { final int status = this.status.get(); if (status == STATUS_QUIESCED) { return new NeverCompleteFuture(delay); } else if (status == STATUS_SHUTDOWN) { throw new IllegalStateException("Timer service is shut down"); } else { // something else happened, so propagate the exception throw e; } } }
- 利用
timestamp - getCurrentProcessingTime()
计算设置的 TTL 时间和当前时间的差值,然后取这个差值和 0 这两者中的较大值,然后 +1 作为定时器调度的延迟时间,正是这一步导致:即使设置的 TTL 时间是历史时间,也会作为当前时间来触发调度。 - 新建 TriggerTask 任务,利用调度器定时调度该任务,触发
onTimer
操作。
- 利用
看看 TriggerTask :
-
进入
org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService.TriggerTask.java
private static final class TriggerTask implements Runnable { private final AtomicInteger serviceStatus; private final Object lock; private final ProcessingTimeCallback target; private final long timestamp; private final AsyncExceptionHandler exceptionHandler; private TriggerTask( final AtomicInteger serviceStatus, final AsyncExceptionHandler exceptionHandler, final Object lock, final ProcessingTimeCallback target, final long timestamp) { this.serviceStatus = Preconditions.checkNotNull(serviceStatus); this.exceptionHandler = Preconditions.checkNotNull(exceptionHandler); this.lock = Preconditions.checkNotNull(lock); this.target = Preconditions.checkNotNull(target); this.timestamp = timestamp; } @Override public void run() { synchronized (lock) { try { if (serviceStatus.get() == STATUS_ALIVE) { target.onProcessingTime(timestamp); } } catch (Throwable t) { TimerException asyncException = new TimerException(t); exceptionHandler.handleAsyncException("Caught exception while processing timer.", asyncException); } } } }
- TriggerTask 实现 Runnable 接口
- TriggerTask.timestamp 赋值为设置的 TTL 时间,正是这一步导致:调用
onTimer(timestamp, ctx, out)
函数时参数 timestamp 的值是设置的历史时间 - run() 方法中调用
onProcessingTime(timestamp)
方法,该方法内部触发onTimer(timestamp, ctx, out)
方法。
4. 结论
- 给 TimeService 设置 TTL 时间为历史时间,定时器也会触发。
- 调用的
onTimer(timestamp, ctx, out)
函数中,参数 timestamp 的值是设置的历史时间,而不是当前时间,当前时间已经大于了 timestamp,即无论何时触发,timestamp 永远是用户设置的 TTL 时间。