flink 学习(十五)flink table api

目录

  • 前言
  • 1、引入依赖
  • 2、基于流创建表
  • 3、表转换成流
  • 4、Table API
  • 5、SQL
  • 6、tableEnv 创建表
  • 7、sql 创建表
  • 8、explain


前言

flink 提供了 Table API 和 SQL的方式,可以通过sql来操作DataStream。

1、引入依赖

相关的依赖

<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-table-api-java-bridge_2.11</artifactId>
			<version>1.14.4</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-table-planner_2.11</artifactId>
			<version>1.14.4</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-scala_2.11</artifactId>
			<version>1.14.4</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-table-api-java-bridge_2.12</artifactId>
			<version>1.14.4</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-table-common</artifactId>
			<version>1.14.4</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-csv</artifactId>
			<version>1.14.4</version>
			<scope>test</scope>
		</dependency>

2、基于流创建表

 public static StreamExecutionEnvironment env = null;

    public static StreamTableEnvironment tableEnv = null;

    public static Table table = null;

    @BeforeEach
    public void before() {
        env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING).setParallelism(1);

        List<Student> list = List.of(
                new Student("a", 60L),
                new Student("a", 80L),
                new Student("a", 70L),
                new Student("b", 60L),
                new Student("b", 80L),
                new Student("c", 50L)
        );

        //数据源
        DataStreamSource<Student> source = env.fromCollection(list);

        //环境配置
        EnvironmentSettings setting = EnvironmentSettings.newInstance()
                .inBatchMode()
                .withBuiltInCatalogName("test_catalog")
                .withBuiltInDatabaseName("test_database")
                .build();
        tableEnv = StreamTableEnvironment.create(env, setting);

        //基于流创建表
        table = tableEnv.fromDataStream(source);
        
    }

3、表转换成流

    @Test
 public void table2DataStreamTest() throws Exception {

        DataStream<Student> dataStream = tableEnv.toDataStream(table, Student.class);
        dataStream.print("tableStream");
        env.execute("table2DataStreamTest");
    }

结果:

tableStream> Student(name=a, score=60)
tableStream> Student(name=a, score=80)
tableStream> Student(name=a, score=70)
tableStream> Student(name=b, score=60)
tableStream> Student(name=b, score=80)
tableStream> Student(name=c, score=50)

4、Table API

通过name分组,求score和

@Test
    public void tableApiTest() throws Exception {
        //创建临时视图
        tableEnv.createTemporaryView("student", table);

        Table t = tableEnv.from("student")
                .groupBy($("name"))
                .select($("name"), $("score").sum().as("total"));

        DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.all());

        dataStream.print("tableStream");
        env.execute("tableApiTest");
    }

结果:

tableStream> +I[a, 210]
tableStream> +I[b, 140]
tableStream> +I[c, 50]

5、SQL

 @Test
    public void sqlTest() throws Exception {

        //创建临时视图
        tableEnv.createTemporaryView("student", table);
        String sql = "select name, sum(score) as total from student group by name";
        Table t = tableEnv.sqlQuery(sql);

        DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());

        dataStream.print("tableStream");
        env.execute("sqlTest");
    }

结果:

tableStream> +I[a, 210]
tableStream> +I[b, 140]
tableStream> +I[c, 50]

6、tableEnv 创建表

	@Test
    public void tableSink1Test() throws Exception {

        Schema schema = Schema.newBuilder()
                .column("name", DataTypes.STRING())
                .column("score", DataTypes.BIGINT())
                .build();
        //创建表
        tableEnv.createTable("table1", TableDescriptor.forConnector("filesystem")
                .schema(schema)
                .option("path", "E:\\tmp")
                .format(FormatDescriptor.forFormat("csv")
                        .option("field-delimiter", "|")
                        .build())
                .build());

        //创建临时视图
        tableEnv.createTemporaryView("student", table);
        String sql = "select name, score from student";
        Table t = tableEnv.sqlQuery(sql);
        DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());

        dataStream.print("tableStream");
        t.executeInsert("table1");
        env.execute("sqlTest");
    }

到 E:\tmp 目录下查看生成的文件
flink 学习(十五)flink table api_第1张图片

7、sql 创建表

@Test
    public void tableSink2Test() throws Exception {

        String creatTable = "CREATE TABLE table2 (\n" +
                "  name STRING,\n" +
                "  total BIGINT\n" +
                ") WITH (\n" +
                "'connector' = 'filesystem',\n" +
                "'path' = 'E:\\tmp',\n" +
                "'format' = 'csv',\n" +
                "'csv.field-delimiter'='|'\n" +
                ")";
        //创建表
        TableResult tableResult = tableEnv.executeSql(creatTable);

        //创建临时视图
        tableEnv.createTemporaryView("student", table);
        String sql = "select name, sum(score) as total from student group by name";
        Table t = tableEnv.sqlQuery(sql);

        DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());

        dataStream.print("tableStream");
        t.executeInsert("table2");
        env.execute("sqlTest");
    }

查看生成的文件

flink 学习(十五)flink table api_第2张图片

8、explain

    @Test
    public void explainTest() throws Exception {

        //创建临时视图
        tableEnv.createTemporaryView("student", table);
        String sql = "select name, sum(score) as total from student group by name";
        Table t = tableEnv.sqlQuery(sql);

        System.out.println(t.explain());

结果:

== Abstract Syntax Tree ==
LogicalAggregate(group=[{0}], total=[SUM($1)])
+- LogicalTableScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]])

== Optimized Physical Plan ==
HashAggregate(isMerge=[true], groupBy=[name], select=[name, Final_SUM(sum$0) AS total])
+- Exchange(distribution=[hash[name]])
   +- LocalHashAggregate(groupBy=[name], select=[name, Partial_SUM(score) AS sum$0])
      +- TableSourceScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]], fields=[name, score])

== Optimized Execution Plan ==
HashAggregate(isMerge=[true], groupBy=[name], select=[name, Final_SUM(sum$0) AS total])
+- Exchange(distribution=[hash[name]])
   +- LocalHashAggregate(groupBy=[name], select=[name, Partial_SUM(score) AS sum$0])
      +- TableSourceScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]], fields=[name, score])

你可能感兴趣的:(flink,flink)