flink 提供了 Table API 和 SQL的方式,可以通过sql来操作DataStream。
相关的依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.14.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.14.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.14.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>1.14.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>1.14.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>1.14.4</version>
<scope>test</scope>
</dependency>
public static StreamExecutionEnvironment env = null;
public static StreamTableEnvironment tableEnv = null;
public static Table table = null;
@BeforeEach
public void before() {
env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING).setParallelism(1);
List<Student> list = List.of(
new Student("a", 60L),
new Student("a", 80L),
new Student("a", 70L),
new Student("b", 60L),
new Student("b", 80L),
new Student("c", 50L)
);
//数据源
DataStreamSource<Student> source = env.fromCollection(list);
//环境配置
EnvironmentSettings setting = EnvironmentSettings.newInstance()
.inBatchMode()
.withBuiltInCatalogName("test_catalog")
.withBuiltInDatabaseName("test_database")
.build();
tableEnv = StreamTableEnvironment.create(env, setting);
//基于流创建表
table = tableEnv.fromDataStream(source);
}
@Test
public void table2DataStreamTest() throws Exception {
DataStream<Student> dataStream = tableEnv.toDataStream(table, Student.class);
dataStream.print("tableStream");
env.execute("table2DataStreamTest");
}
结果:
tableStream> Student(name=a, score=60)
tableStream> Student(name=a, score=80)
tableStream> Student(name=a, score=70)
tableStream> Student(name=b, score=60)
tableStream> Student(name=b, score=80)
tableStream> Student(name=c, score=50)
通过name分组,求score和
@Test
public void tableApiTest() throws Exception {
//创建临时视图
tableEnv.createTemporaryView("student", table);
Table t = tableEnv.from("student")
.groupBy($("name"))
.select($("name"), $("score").sum().as("total"));
DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.all());
dataStream.print("tableStream");
env.execute("tableApiTest");
}
结果:
tableStream> +I[a, 210]
tableStream> +I[b, 140]
tableStream> +I[c, 50]
@Test
public void sqlTest() throws Exception {
//创建临时视图
tableEnv.createTemporaryView("student", table);
String sql = "select name, sum(score) as total from student group by name";
Table t = tableEnv.sqlQuery(sql);
DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());
dataStream.print("tableStream");
env.execute("sqlTest");
}
结果:
tableStream> +I[a, 210]
tableStream> +I[b, 140]
tableStream> +I[c, 50]
@Test
public void tableSink1Test() throws Exception {
Schema schema = Schema.newBuilder()
.column("name", DataTypes.STRING())
.column("score", DataTypes.BIGINT())
.build();
//创建表
tableEnv.createTable("table1", TableDescriptor.forConnector("filesystem")
.schema(schema)
.option("path", "E:\\tmp")
.format(FormatDescriptor.forFormat("csv")
.option("field-delimiter", "|")
.build())
.build());
//创建临时视图
tableEnv.createTemporaryView("student", table);
String sql = "select name, score from student";
Table t = tableEnv.sqlQuery(sql);
DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());
dataStream.print("tableStream");
t.executeInsert("table1");
env.execute("sqlTest");
}
@Test
public void tableSink2Test() throws Exception {
String creatTable = "CREATE TABLE table2 (\n" +
" name STRING,\n" +
" total BIGINT\n" +
") WITH (\n" +
"'connector' = 'filesystem',\n" +
"'path' = 'E:\\tmp',\n" +
"'format' = 'csv',\n" +
"'csv.field-delimiter'='|'\n" +
")";
//创建表
TableResult tableResult = tableEnv.executeSql(creatTable);
//创建临时视图
tableEnv.createTemporaryView("student", table);
String sql = "select name, sum(score) as total from student group by name";
Table t = tableEnv.sqlQuery(sql);
DataStream<Row> dataStream = tableEnv.toChangelogStream(t, Schema.derived(), ChangelogMode.upsert());
dataStream.print("tableStream");
t.executeInsert("table2");
env.execute("sqlTest");
}
查看生成的文件
@Test
public void explainTest() throws Exception {
//创建临时视图
tableEnv.createTemporaryView("student", table);
String sql = "select name, sum(score) as total from student group by name";
Table t = tableEnv.sqlQuery(sql);
System.out.println(t.explain());
结果:
== Abstract Syntax Tree ==
LogicalAggregate(group=[{0}], total=[SUM($1)])
+- LogicalTableScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]])
== Optimized Physical Plan ==
HashAggregate(isMerge=[true], groupBy=[name], select=[name, Final_SUM(sum$0) AS total])
+- Exchange(distribution=[hash[name]])
+- LocalHashAggregate(groupBy=[name], select=[name, Partial_SUM(score) AS sum$0])
+- TableSourceScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]], fields=[name, score])
== Optimized Execution Plan ==
HashAggregate(isMerge=[true], groupBy=[name], select=[name, Final_SUM(sum$0) AS total])
+- Exchange(distribution=[hash[name]])
+- LocalHashAggregate(groupBy=[name], select=[name, Partial_SUM(score) AS sum$0])
+- TableSourceScan(table=[[test_catalog, test_database, Unregistered_DataStream_Source_1]], fields=[name, score])