flink 1.9
除了从DataStream操作的结果中获取主数据流之外,你还可以产生任意数量额外的侧输出结果流。侧输出结果流的数据类型不需要与主数据流的类型一致,不同侧输出流的类型也可以不同。当您想要拆分数据流时,通常的做法是复制流,然后从每个流过滤出您不想拥有的数据,但是如果使用side output操作可以很好的解决这种问题。
在使用侧输出时,首先用户需要定义一个OutputTag,这个OutputTag将用来标识一个侧输出流side output stream:
// 这需要是一个匿名的内部类,以便我们可以分析类型
OutputTag outputTag = new OutputTag("side-output") {};
请注意OutputTag是如何根据侧输出流包含的元素类型进行类型化的。
可以通过以下类函数将数据发送到侧输出side output:
您可以使用Context参数(在上述函数中向用户公开)将数据发送到OutputTag标识的侧输出。 以下是从ProcessFunction发出侧输出数据的示例:
public class DataSource1 extends RichParallelSourceFunction> {
private volatile boolean running = true;
@Override
public void run(SourceContext> ctx) throws InterruptedException {
// Tuple3[] elements = new Tuple3[]{
// Tuple3.of("a", "1", 1551169050000L),
// Tuple3.of("aa", "33", 1551169064000L),
// Tuple3.of("a", "2", 1551169054000L),
// Tuple3.of("a", "3", 1551169064000L),
// Tuple3.of("b", "5", 1551169100000L),
// Tuple3.of("a", "4", 1551169079999L),
// Tuple3.of("aa1", "44", 1551169079000L),
// Tuple3.of("b", "6", 1551169108000L)
// };
Tuple5[] elements1 = new Tuple5[]{
Tuple5.of("a", 2, 1551169050002L, "w", "e"),
Tuple5.of("a", 1, 1551169050002L, "2", "1"),
Tuple5.of("a", 1, 1551169050001L, "3", "1"),
Tuple5.of("aa", 33, 1551169064000L, "4", "1"),
Tuple5.of("a", 2, 1551169054000L, "5", "1"),
Tuple5.of("a", 1, 1551169050003L, "6", "1"),
Tuple5.of("a", 3, 1551169064000L, "7", "1"),
Tuple5.of("b", 5, 1551169100000L, "8", "1"),
Tuple5.of("a", 4, 1551169079000L, "9", "1"),
Tuple5.of("aa", 44, 1551169079000L, "10", "1"),
Tuple5.of("b", 6, 1551169108000L, "11", "1")
};
int count = 0;
while (running && count < elements1.length) {
ctx.collect(new Tuple5<>((String)elements1[count].f0,(Integer)elements1[count].f1,(Long)elements1[count].f2,
(String)elements1[count].f3,(String)elements1[count].f4));
count++;
Thread.sleep(300);
}
}
@Override
public void cancel() {
running = false;
}
}
public class SideOutputs {
public static void main(String[] args) {
Long delay = 5000L;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(2);
// 设置数据源
DataStream> source = env.addSource(new DataSource1()).setParallelism(1).name("Demo Source");
final OutputTag outputTag = new OutputTag("side-output"){};
SingleOutputStreamOperator mainDataStream = source
.process(new ProcessFunction, Integer>() {
@Override
public void processElement(Tuple5 value, Context ctx, Collector out) throws Exception {
out.collect(value.f1);
ctx.output(outputTag, "sideout-" + value.f4);
}
});
}
}
你可以在DataStream操作的结果中使用getSideOutput(OutputTag)来获取侧输出流。这里为您提供一个DataStream类型,用于将输入结果发送到侧输出流side output stream中:
public class SideOutputs {
public static void main(String[] args) {
Long delay = 5000L;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(2);
// 设置数据源
DataStream> source = env.addSource(new DataSource1()).setParallelism(1).name("Demo Source");
final OutputTag outputTag = new OutputTag("side-output"){};
SingleOutputStreamOperator mainDataStream = source
.process(new ProcessFunction, Integer>() {
@Override
public void processElement(Tuple5 value, Context ctx, Collector out) throws Exception {
out.collect(value.f1);
ctx.output(outputTag, "sideout-" + value.f4);
}
});
DataStream sideOutput = mainDataStream.getSideOutput(outputTag);
}
}
https://ci.apache.org/projects/flink/flink-docs-release-1.9/dev/stream/side_output.html
https://www.jianshu.com/p/0350cd9a38b5