Flink_SQL Interval Join

package com.ygy.gmall.realtime.test3;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class Flink_SQL_stream_join2 {

    public static void main(String[] args) throws Exception {
        //环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //创建tableEnv
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //测试A流
        tableEnv.executeSql("CREATE TABLE kafka_source_ygy_str_test (\n" +
                "    `str` string,\n" +
                "    proctime as PROCTIME()\n" +
                ") WITH (\n" +
                "    'connector' = 'kafka',\n" +
                "    'topic' = 'ygy_str_test',\n" +
                "    'properties.bootstrap.servers' = '***:9092',\n" +
                "    'properties.group.id' = 'ygy_str_test_group',\n" +
                "    'scan.startup.mode' = 'latest-offset',\n" +
                "    'format' = 'csv',\n" +
                "    'csv.ignore-parse-errors' = 'true',\n" +
                "    'csv.field-delimiter' = '\\t'\n" +
                ")");
		//测试B流
        tableEnv.executeSql("CREATE TABLE kafka_source_ygy_str_test2 (\n" +
                "    `a_str` string,\n" +
                "    `b_str` string,\n" +
                "    proctime as PROCTIME()\n" +
                ") WITH (\n" +
                "    'connector' = 'kafka',\n" +
                "    'topic' = 'ygy_str_test2',\n" +
                "    'properties.bootstrap.servers' = '***:9092,\n" +
                "    'properties.group.id' = 'ygy_str_test_group',\n" +
                "    'scan.startup.mode' = 'latest-offset',\n" +
                "    'format' = 'csv',\n" +
                "    'csv.ignore-parse-errors' = 'true',\n" +
                "    'csv.field-delimiter' = ','\n" +
                ")");
                
                //join操作
				tableEnv.sqlQuery(
                "select\n" +
                        "    a.*,\n" +
                        "    b.*\n" +
                        "from\n" +
                        "    kafka_source_ygy_str_test a\n" +
                        "   join kafka_source_ygy_str_test2 b on a.str = b.a_str\n" +
                        "    where b.proctime BETWEEN a.proctime - INTERVAL '5' SECOND\n" +
                        "    AND  a.proctime "
        ).execute().print();

        env.execute();
    }
}

输出结果

+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| op |                            str |                proctime |                          a_str |                          b_str |               proctime0 |
+----+--------------------------------+-------------------------+--------------------------------+--------------------------------+-------------------------+
| +I |                              a | 2021-06-23T08:01:29.307 |                              a |                              A | 2021-06-23T08:01:29.307 |

在测试的时候如果某一条的流在五秒钟没到,数据就会直接过滤掉,join上的则是五秒内的

Interval Join目前不支持left join,如果强行使用会和left join效果一样

你可能感兴趣的:(flink,sql)