flink 1.10.1 java版本sql OverWindow示例(每事件出结果)

本文的基础环境可以参考flink 1.10.1 java版本wordcount演示 (nc + socket)

overwindow的处理方式是每接收到一条数据,都进行一次计算输出。

以事件时间为参考,以overwindow的滚动窗口的方式,统计窗口范围内的数据,包括数据个数,平均值等。

1. 添加依赖


            org.apache.flink
            flink-table-planner_2.11
            1.10.1

不同版本,这里可能需要添加到包不同

2. 程序代码

package com.demo.sql;

import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Over;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;


public class FlinkSqlOverWindow {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        env.setParallelism(1);

        // 1. 读取数据
        DataStream inputStream = env.readTextFile("data/sensor.txt");

        // 2. 转换成POJO
        DataStream dataStream = inputStream.map(line -> {
            String[] fields = line.split(",");
            return new SensorData(fields[0], new Long(fields[1]), new Double(fields[2]));
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(2)) {
            @Override
            public long extractTimestamp(SensorData element) {
                return element.getDt() * 1000L;
            }
        });

        // 3. 创建表环境
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        // 4. 基于流创建一张表
        Table dataTable = tableEnv.fromDataStream(dataStream, "id, dt, temperature, rt.rowtime");

        // dataTable.printSchema();

        tableEnv.createTemporaryView("sensor", dataTable);

        tableEnv.toAppendStream(dataTable, Row.class).print();

        // 有界的事件时间Row-count over window (时间字段 "rt",之前2行)
        Table overResult = dataTable.window(Over.partitionBy("id").orderBy("rt").preceding("2.rows").as("ow"))
                .select("id, rt, id.count over ow, temperature.avg over ow");

        // SQL
        // 有界的事件时间Row-count over window (时间字段 "rt",之前2行)
        Table overSqlResult = tableEnv.sqlQuery("select id, rt, count(id) over ow, avg(temperature) over ow " +
                " from sensor " +
                " window ow as (partition by id order by rt rows between 2 preceding and current row)");

        dataTable.printSchema();
        tableEnv.toAppendStream(overResult, Row.class).print("result");
        tableEnv.toRetractStream(overSqlResult, Row.class).print("sql");

        env.execute();

    }
}

3. 辅助代码SensorData

package com.demo.sql;

public class SensorData {

    private String id;
    private Long dt;
    private Double temperature;

    public SensorData() {
    }

    public SensorData(String id, Long dt, Double temperature) {
        this.id = id;
        this.dt = dt;
        this.temperature = temperature;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public Long getDt() {
        return dt;
    }

    public void setDt(Long dt) {
        this.dt = dt;
    }

    public Double getTemperature() {
        return temperature;
    }

    public void setTemperature(Double temperature) {
        this.temperature = temperature;
    }

    @Override
    public String toString() {
        return "SensorData{" +
                "id='" + id + '\'' +
                ", time=" + dt +
                ", temperature=" + temperature +
                '}';
    }
}

4. 测试数据

sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718207,36.3
sensor_1,1547718209,32.8
sensor_1,1547718212,37.1

5. 执行程序输出结果

root
 |-- id: STRING
 |-- dt: BIGINT
 |-- temperature: DOUBLE
 |-- rt: TIMESTAMP(3) *ROWTIME*

sensor_1,1547718199,35.8,2019-01-17 09:43:19.0
sensor_6,1547718201,15.4,2019-01-17 09:43:21.0
sensor_7,1547718202,6.7,2019-01-17 09:43:22.0
sensor_10,1547718205,38.1,2019-01-17 09:43:25.0
sensor_1,1547718207,36.3,2019-01-17 09:43:27.0
sensor_1,1547718209,32.8,2019-01-17 09:43:29.0
sensor_1,1547718212,37.1,2019-01-17 09:43:32.0

result> sensor_1,2019-01-17 09:43:19.0,1,35.8
sql> (true,sensor_1,2019-01-17 09:43:19.0,1,35.8)
result> sensor_6,2019-01-17 09:43:21.0,1,15.4
result> sensor_7,2019-01-17 09:43:22.0,1,6.7
sql> (true,sensor_6,2019-01-17 09:43:21.0,1,15.4)
result> sensor_10,2019-01-17 09:43:25.0,1,38.1
sql> (true,sensor_7,2019-01-17 09:43:22.0,1,6.7)
result> sensor_1,2019-01-17 09:43:27.0,2,36.05
sql> (true,sensor_10,2019-01-17 09:43:25.0,1,38.1)
result> sensor_1,2019-01-17 09:43:29.0,3,34.96666666666666
sql> (true,sensor_1,2019-01-17 09:43:27.0,2,36.05)
result> sensor_1,2019-01-17 09:43:32.0,3,35.4
sql> (true,sensor_1,2019-01-17 09:43:29.0,3,34.96666666666666)
sql> (true,sensor_1,2019-01-17 09:43:32.0,3,35.4)

可以看出overwindow的处理方式时,每接收到一条数据,都进行一次计算输出。这里是以事件时间排序,以前两条数据和当前数据为窗口进行计算。当之前没有数据时,只以当前数据为窗口计算结果。

你可能感兴趣的:(大数据技术,java,flink,sql)