目录
1. 触发器(Trigger)
2. 移除器(Evictor)
3. 允许延迟(Allowed Lateness)
4. 将迟到的数据放入侧输出流
stream.keyBy(...)
.window(...)
.trigger(new MyTrigger())
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import
org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import
org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
public class TriggerExample {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env
.addSource(new ClickSource())
.assignTimestampsAndWatermarks(
WatermarkStrategy.forMonotonousTimestamps()
.withTimestampAssigner(new
SerializableTimestampAssigner() {
@Override
public long extractTimestamp(Event event, long l) {
return event.timestamp;
}
})
)
.keyBy(r -> r.url)
.window(TumblingEventTimeWindows.of(Time.seconds(10)))
.trigger(new MyTrigger())
.process(new WindowResult())
.print();
env.execute();
}
public static class WindowResult extends ProcessWindowFunction {
@Override
public void process(String s, Context context, Iterable iterable,
Collector collector) throws Exception {
collector.collect(
new UrlViewCount(
s,
// 获取迭代器中的元素个数
iterable.spliterator().getExactSizeIfKnown(),
context.window().getStart(),
context.window().getEnd()
)
);
}
}
public static class MyTrigger extends Trigger {
@Override
public TriggerResult onElement(Event event, long l, TimeWindow timeWindow,
TriggerContext triggerContext) throws Exception {
ValueState isFirstEvent =
triggerContext.getPartitionedState(
new ValueStateDescriptor("first-event",
Types.BOOLEAN)
);
if (isFirstEvent.value() == null) {
for (long i = timeWindow.getStart(); i < timeWindow.getEnd(); i =
i + 1000L) {
triggerContext.registerEventTimeTimer(i);
}
isFirstEvent.update(true);
}
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onEventTime(long l, TimeWindow timeWindow,
TriggerContext triggerContext) throws Exception {
return TriggerResult.FIRE;
}
@Override
public TriggerResult onProcessingTime(long l, TimeWindow timeWindow,
TriggerContext triggerContext) throws Exception {
return TriggerResult.CONTINUE;
}
@Override
public void clear(TimeWindow timeWindow, TriggerContext triggerContext)
throws Exception {
ValueState isFirstEvent =
triggerContext.getPartitionedState(
new ValueStateDescriptor("first-event",
Types.BOOLEAN)
);
isFirstEvent.clear();
}
}
}
UrlViewCount{url='./prod?id=1', count=1, windowStart=2021-07-01 14:44:10.0,windowEnd=2021-07-01 14:44:20.0}172 173UrlViewCount{url='./prod?id=1', count=1, windowStart=2021-07-01 14:44:10.0,windowEnd=2021-07-01 14:44:20.0}UrlViewCount{url='./prod?id=1', count=1, windowStart=2021-07-01 14:44:10.0,windowEnd=2021-07-01 14:44:20.0}UrlViewCount{url='./prod?id=1', count=1, windowStart=2021-07-01 14:44:10.0,windowEnd=2021-07-01 14:44:20.0}
stream.keyBy(...)
.window(...)
.evictor(new MyEvictor())
Evictor 接口定义了两个方法:
stream.keyBy(...)
.window(TumblingEventTimeWindows.of(Time.hours(1)))
.allowedLateness(Time.minutes(1))
比如上面的代码中,我们定义了 1 小时的滚动窗口,并设置了允许 1 分钟的延迟数据。也
DataStream stream = env.addSource(...);
OutputTag outputTag = new OutputTag("late") {};
stream.keyBy(...)
.window(TumblingEventTimeWindows.of(Time.hours(1)))
.sideOutputLateData(outputTag)
SingleOutputStreamOperator winAggStream = stream.keyBy(...)
.window(TumblingEventTimeWindows.of(Time.hours(1)))
.sideOutputLateData(outputTag)
.aggregate(new MyAggregateFunction())
DataStream lateStream = winAggStream.getSideOutput(outputTag);