1、ReduceFunction
增量,输入、状态、输出类型相同
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import java.time.Duration;
import java.util.Random;
public class ReduceFunctionTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
executionEnvironment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
executionEnvironment.getConfig().setAutoWatermarkInterval(100);
DataStreamSource> streamSource = executionEnvironment.addSource(new SourceFunction>() {
boolean flag = true;
@Override
public void run(SourceContext> sourceContext) throws Exception {
String[] str = {"韩梅梅", "张三", "王五", "李四"};
while (flag) {
Thread.sleep(1000);
int i = new Random().nextInt(4);
sourceContext.collect(new Tuple2(str[i], System.currentTimeMillis()));
}
}
@Override
public void cancel() {
flag = false;
}
});
streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofSeconds(1))
.withTimestampAssigner(new SerializableTimestampAssigner>() {
@Override
public long extractTimestamp(Tuple2 stringLongTuple2, long l) {
return stringLongTuple2.f1;
}
})).map(new MapFunction, Tuple3>() {
@Override
public Tuple3 map(Tuple2 stringLongTuple2) throws Exception {
System.out.println(stringLongTuple2.f0 + stringLongTuple2.f1);
return new Tuple3(stringLongTuple2.f0,stringLongTuple2.f1,1);
}
}).keyBy(new KeySelector, String>() {
@Override
public String getKey(Tuple3 stringIntegerTuple2) throws Exception {
return stringIntegerTuple2.f0;
}
}).window(TumblingEventTimeWindows.of(Time.seconds(10)))
.reduce(new ReduceFunction>() {
@Override
public Tuple3 reduce(Tuple3 stringIntegerTuple2, Tuple3 t1) throws Exception {
return new Tuple3(stringIntegerTuple2.f0,stringIntegerTuple2.f1,stringIntegerTuple2.f2 + t1.f2);
}
}).print();
executionEnvironment.execute("reduce test");
}
}
2、AggregateFunction
增量,输入、状态、输出类型可以不同
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import java.time.Duration;
import java.util.Random;
public class AggregateFunctionTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setAutoWatermarkInterval(100);
DataStreamSource> streamSource = env.addSource(new SourceFunction>() {
boolean flag = true;
@Override
public void run(SourceContext> sourceContext) throws Exception {
String[] str = {"韩梅梅", "张三", "王五", "李四"};
while (flag) {
Thread.sleep(1000);
int i = new Random().nextInt(4);
sourceContext.collect(new Tuple2(str[i], System.currentTimeMillis()));
}
}
@Override
public void cancel() {
flag = false;
}
});
streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofSeconds(1))
.withTimestampAssigner(new SerializableTimestampAssigner>() {
@Override
public long extractTimestamp(Tuple2 stringLongTuple2, long l) {
return stringLongTuple2.f1;
}
})).map(new MapFunction, Tuple2>() {
@Override
public Tuple2 map(Tuple2 stringLongTuple2) throws Exception {
return new Tuple2(stringLongTuple2.f0,1);
}
}).keyBy(new KeySelector, String>() {
@Override
public String getKey(Tuple2 stringIntegerTuple2) throws Exception {
return stringIntegerTuple2.f0;
}
}).window(TumblingEventTimeWindows.of(Time.seconds(10)))
.aggregate(new AggregateFunction, Tuple2, Tuple2>() {
//存储中间状态state,窗口初始化时调用
@Override
public Tuple2 createAccumulator() {
return new Tuple2("",0);
}
//窗口来新元素时调用
@Override
public Tuple2 add(Tuple2 stringIntegerTuple2, Tuple2 stringIntegerTuple22) {
return new Tuple2(stringIntegerTuple2.f0,stringIntegerTuple2.f1 + stringIntegerTuple22.f1);
}
//获取最后结果
@Override
public Tuple2 getResult(Tuple2 stringIntegerTuple2) {
return stringIntegerTuple2;
}
//合并两个state,窗口类型为session的时候使用,两个session窗口有可能合并为一个
@Override
public Tuple2 merge(Tuple2 stringIntegerTuple2, Tuple2 acc1) {
return null;
}
}).print();
env.execute("aggregate test");
}
}
3、ProcessFunction
窗口数据全量计算,输入、输出类型可以不同
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.Random;
public class ProcessFunctionTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setAutoWatermarkInterval(100);
DataStreamSource> streamSource = env.addSource(new SourceFunction>() {
boolean flag = true;
@Override
public void run(SourceContext> sourceContext) throws Exception {
String[] str = {"韩梅梅", "张三", "王五", "李四"};
while (flag) {
Thread.sleep(1000);
int i = new Random().nextInt(4);
sourceContext.collect(new Tuple2(str[i], System.currentTimeMillis()));
}
}
@Override
public void cancel() {
flag = false;
}
});
streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofSeconds(1))
.withTimestampAssigner(new SerializableTimestampAssigner>() {
@Override
public long extractTimestamp(Tuple2 stringLongTuple2, long l) {
return stringLongTuple2.f1;
}
})).map(new MapFunction, Tuple2>() {
@Override
public Tuple2 map(Tuple2 stringLongTuple2) throws Exception {
return new Tuple2(stringLongTuple2.f0,1);
}
}).keyBy(new KeySelector, String>() {
@Override
public String getKey(Tuple2 stringIntegerTuple2) throws Exception {
return stringIntegerTuple2.f0;
}
}).window(TumblingEventTimeWindows.of(Time.seconds(10)))
//输入,输出,key,窗口类型
.process(new ProcessWindowFunction, Tuple2, String, TimeWindow>() {
//key,上下文,窗口中的所有元素,返回收集器
@Override
public void process(String key, Context context, Iterable> elements, Collector> out) throws Exception {
int count = 0;
for (Tuple2 value : elements ) {
count = count + value.f1;
}
out.collect(new Tuple2(key,count));
}
}).print();
env.execute("process test");
}
}