本文的基础环境可以参考flink 1.10.1 java版本wordcount演示 (nc + socket)
overwindow的处理方式是每接收到一条数据,都进行一次计算输出。
以事件时间为参考,以overwindow的滚动窗口的方式,统计窗口范围内的数据,包括数据个数,平均值等。
org.apache.flink
flink-table-planner_2.11
1.10.1
不同版本,这里可能需要添加到包不同
package com.demo.sql;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Over;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class FlinkSqlOverWindow {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
// 1. 读取数据
DataStream inputStream = env.readTextFile("data/sensor.txt");
// 2. 转换成POJO
DataStream dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SensorData(fields[0], new Long(fields[1]), new Double(fields[2]));
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(2)) {
@Override
public long extractTimestamp(SensorData element) {
return element.getDt() * 1000L;
}
});
// 3. 创建表环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 4. 基于流创建一张表
Table dataTable = tableEnv.fromDataStream(dataStream, "id, dt, temperature, rt.rowtime");
// dataTable.printSchema();
tableEnv.createTemporaryView("sensor", dataTable);
tableEnv.toAppendStream(dataTable, Row.class).print();
// 有界的事件时间Row-count over window (时间字段 "rt",之前2行)
Table overResult = dataTable.window(Over.partitionBy("id").orderBy("rt").preceding("2.rows").as("ow"))
.select("id, rt, id.count over ow, temperature.avg over ow");
// SQL
// 有界的事件时间Row-count over window (时间字段 "rt",之前2行)
Table overSqlResult = tableEnv.sqlQuery("select id, rt, count(id) over ow, avg(temperature) over ow " +
" from sensor " +
" window ow as (partition by id order by rt rows between 2 preceding and current row)");
dataTable.printSchema();
tableEnv.toAppendStream(overResult, Row.class).print("result");
tableEnv.toRetractStream(overSqlResult, Row.class).print("sql");
env.execute();
}
}
package com.demo.sql;
public class SensorData {
private String id;
private Long dt;
private Double temperature;
public SensorData() {
}
public SensorData(String id, Long dt, Double temperature) {
this.id = id;
this.dt = dt;
this.temperature = temperature;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getDt() {
return dt;
}
public void setDt(Long dt) {
this.dt = dt;
}
public Double getTemperature() {
return temperature;
}
public void setTemperature(Double temperature) {
this.temperature = temperature;
}
@Override
public String toString() {
return "SensorData{" +
"id='" + id + '\'' +
", time=" + dt +
", temperature=" + temperature +
'}';
}
}
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718207,36.3
sensor_1,1547718209,32.8
sensor_1,1547718212,37.1
root
|-- id: STRING
|-- dt: BIGINT
|-- temperature: DOUBLE
|-- rt: TIMESTAMP(3) *ROWTIME*
sensor_1,1547718199,35.8,2019-01-17 09:43:19.0
sensor_6,1547718201,15.4,2019-01-17 09:43:21.0
sensor_7,1547718202,6.7,2019-01-17 09:43:22.0
sensor_10,1547718205,38.1,2019-01-17 09:43:25.0
sensor_1,1547718207,36.3,2019-01-17 09:43:27.0
sensor_1,1547718209,32.8,2019-01-17 09:43:29.0
sensor_1,1547718212,37.1,2019-01-17 09:43:32.0
result> sensor_1,2019-01-17 09:43:19.0,1,35.8
sql> (true,sensor_1,2019-01-17 09:43:19.0,1,35.8)
result> sensor_6,2019-01-17 09:43:21.0,1,15.4
result> sensor_7,2019-01-17 09:43:22.0,1,6.7
sql> (true,sensor_6,2019-01-17 09:43:21.0,1,15.4)
result> sensor_10,2019-01-17 09:43:25.0,1,38.1
sql> (true,sensor_7,2019-01-17 09:43:22.0,1,6.7)
result> sensor_1,2019-01-17 09:43:27.0,2,36.05
sql> (true,sensor_10,2019-01-17 09:43:25.0,1,38.1)
result> sensor_1,2019-01-17 09:43:29.0,3,34.96666666666666
sql> (true,sensor_1,2019-01-17 09:43:27.0,2,36.05)
result> sensor_1,2019-01-17 09:43:32.0,3,35.4
sql> (true,sensor_1,2019-01-17 09:43:29.0,3,34.96666666666666)
sql> (true,sensor_1,2019-01-17 09:43:32.0,3,35.4)
可以看出overwindow的处理方式时,每接收到一条数据,都进行一次计算输出。这里是以事件时间排序,以前两条数据和当前数据为窗口进行计算。当之前没有数据时,只以当前数据为窗口计算结果。