Flink算子通用状态应用测试样例
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<Map<String, String>> source = env.addSource(new SourceFunction<Map<String, String>>() {
@Override
public void run(SourceContext<Map<String, String>> ctx) throws Exception {
while (true) {
HashMap<String, String> hashMap = new HashMap<>();
hashMap.put("ID", new Random().nextInt(3) + 1 + "");
hashMap.put("AMT", "1");
System.out.println("------");
System.out.println("生产数据:" + hashMap);
ctx.collect(hashMap);
Thread.sleep(1000);
}
}
@Override
public void cancel() {}
});
KeyedStream<Map<String, String>, String> keyedStream = source.keyBy(new KeySelector<Map<String, String>, String>() {
@Override
public String getKey(Map<String, String> value) throws Exception {
return value.get("ID") + LocalDate.now().getDayOfWeek();
}
});
SingleOutputStreamOperator<Map<String, String>> process = keyedStream.process(new KeyedProcessFunction<String, Map<String, String>, Map<String, String>>() {
private AggregatingState<Map<String, String>, Map<String, String>> aggState;
@Override
public void open(Configuration parameters) throws Exception {
// 配置状态的TTL
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.days(1))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) // 仅在创建和写入时清除,另一个读和写时清除
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) // 不退回过期值
.build();
// 初始化状态
AggregatingStateDescriptor<Map<String, String>, Map<String, String>, Map<String, String>> aggRes = new AggregatingStateDescriptor<>("aggRes", new AggregateFunction<Map<String, String>, Map<String, String>, Map<String, String>>() {
@Override
public Map<String, String> createAccumulator() {
return new HashMap<>();
}
@Override
public Map<String, String> add(Map<String, String> in, Map<String, String> acc) {
String amt = acc.get("AMT");
if (amt == null) {
acc.put("ID", in.get("ID"));
acc.put("AMT", in.get("AMT"));
} else {
acc.put("AMT", Integer.valueOf(in.get("AMT")) + Integer.valueOf(amt) + "");
}
return acc;
}
@Override
public Map<String, String> getResult(Map<String, String> acc) {
return acc;
}
@Override
public Map<String, String> merge(Map<String, String> a, Map<String, String> b) {
return null;
}
}, TypeInformation.of(new TypeHint<Map<String, String>>() {
}));
aggRes.enableTimeToLive(ttlConfig);
aggState = getRuntimeContext().getAggregatingState(aggRes);
}
@Override
public void processElement(Map<String, String> value, KeyedProcessFunction<String, Map<String, String>, Map<String, String>>.Context ctx, Collector<Map<String, String>> out) throws Exception {
aggState.add(value);
out.collect(aggState.get());
}
});
process.map((MapFunction<Map<String, String>, Object>) value -> {
System.out.println("聚合结果:" + value);
return null;
});
env.execute("Flink Common State Test");
------
生产数据:{AMT=1, ID=2}
聚合结果:{AMT=1, ID=2}
------
生产数据:{AMT=1, ID=3}
聚合结果:{AMT=1, ID=3}
------
生产数据:{AMT=1, ID=3}
聚合结果:{AMT=2, ID=3}
------
生产数据:{AMT=1, ID=1}
聚合结果:{AMT=1, ID=1}
------
生产数据:{AMT=1, ID=1}
聚合结果:{AMT=2, ID=1}
------
生产数据:{AMT=1, ID=1}
聚合结果:{AMT=3, ID=1}
...
这段代码实现了一个 Flink 作业,生成随机数据并对数据进行状态聚合处理。其中包括数据源生成、按键分区、状态初始化、元素聚合处理和结果输出。可以作为多场景下通用的实时数据处理模型。