Flink针对标准的流处理和批处理提供了两种关系型API:Table API 和 SQL。Table API 可以直接进行select、filter、join等操作;Flink SQL则是基于Apache Calcite实现标准的SQL,和SQL语言一致,适合大部分开发人员。
Flink Table API和SQL 捆绑在Flink-Table依赖中,如果要使用需要添加一下依赖:
以Flink 1.7.2为例
<!--java-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<!--scala-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
一、首先需要创建一个TableEnvironment。TableEnvironment可以实现以下功能:
流数据查询
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
批数据查询
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = BatchTableEnvironment.getTableEnvironment(env);
二、通过获取到的TableEnvironment对象创建Table对象,有两种类型的Table对象:输入Table(TableSource)和输出Table(TableSink)
TableSource
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
//CsvTableSource: 文件路径、字段名、字段类型
TableSource csvSource = new CsvTableSource("path",new String[]{
"name","age"},new TypeInformation[]{
Types.STRING,Types.INT});
//注册一个TableSource,称为CsvTable
tableEnv.registerTableSource("CsvTable", csvSource);
TableSink
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
//通过TableSink把数据写到外部
//创建一个TableSink
TableSink csvSink = new CsvTableSink("path","字段之间的格式 ,");
//定义字段名和类型
String[] fieldNames = {
"cid", "cname", "revsum"};
TypeInformation[] filedTypes = {
Types.INT, Types.STRING, Types.INT};
//注册一个TableSink
tableEnv.registerTableSink("CsvSinkTable", fieldNames, filedTypes, csvSink);
三、使用Table API和SQL操作
SQL
//使用SQL操作table
//计算来自法国的收入
Table revenue = tableEnv.sqlQuery("select cid, cname, sum(revenue) as revsum " +
"from orders" +
"where country = 'france'" +
"group by cid,cname");
Table API
Table orders = tableEnv.scan("orders");
Table revenue = orders.filter("count == 'france'").groupBy("cid, cname").select("cid, cname, revenue.sum as revSum");
四、DataStream、DataSet和Table之间的转换
Table->DataStream
//将Table中的数据转化成DataStream
DataStream<Row> dsRow = ((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).toAppendStream(表对象, Row.class);
//将Table中的数据转化成DataStream
TupleTypeInfo<Tuple2<String, Integer>> tupleType = new TupleTypeInfo<>(Types.STRING, Types.INT);
DataStream<Tuple2<String, Integer>> dsTuple = ((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).toAppendStream(revenue, tupleType);
Table->DataSet
//将Table中的数据转化成DataStream
DataSet<Row> dsRow = ((org.apache.flink.table.api.java.BatchTableEnvironment) tableEnv).toDataSet(表对象, Row.class);
//将Table中的数据转化成DataStream
TupleTypeInfo<Tuple2<String, Integer>> tupleType = new TupleTypeInfo<>(Types.STRING, Types.INT);
DataSet<Tuple2<String, Integer>> dsTuple = ((org.apache.flink.table.api.java.BatchTableEnvironment) tableEnv).toDataSet(revenue, tupleType);
DataStream->Table
//将DataStream转化成Table
DataStream<Tuple2<String,String>> stream = ...;
((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).registerDataStream("mytable", stream);
完整代码
package com.basic;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.StreamTableEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.sinks.CsvTableSink;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.sources.CsvTableSource;
import org.apache.flink.table.sources.TableSource;
import org.apache.flink.types.Row;
/**
* FlinkTable 任务 流数据查询
*/
public class FlinkTableJobStream {
public static void main(String[] args) {
//1.创建TableEnvironment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
//创建一个TableSource
//CsvTableSource: 文件路径、字段名、字段类型
TableSource csvSource = new CsvTableSource("path",new String[]{
"name","age"},new TypeInformation[]{
Types.STRING,Types.INT});
//注册一个TableSource,称为CsvTable
tableEnv.registerTableSource("CsvTable", csvSource);
//使用SQL操作table
//计算来自法国的收入
Table revenue = tableEnv.sqlQuery("select cid, cname, sum(revenue) as revsum " +
"from orders" +
"where country = 'france'" +
"group by cid,cname");
//将Table中的数据转化成DataStream
DataStream<Row> dsRow = ((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).toAppendStream(revenue, Row.class);
//将Table中的数据转化成DataStream
TupleTypeInfo<Tuple2<String, Integer>> tupleType = new TupleTypeInfo<>(Types.STRING, Types.INT);
DataStream<Tuple2<String, Integer>> dsTuple = ((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).toAppendStream(revenue, tupleType);
//通过TableSink把数据写到外部
//创建一个TableSink
TableSink csvSink = new CsvTableSink("path","字段之间的格式 ,");
//定义字段名和类型
String[] fieldNames = {
"cid", "cname", "revsum"};
TypeInformation[] filedTypes = {
Types.INT, Types.STRING, Types.INT};
//注册一个TableSink
tableEnv.registerTableSink("CsvSinkTable", fieldNames, filedTypes, csvSink);
//把结果添加到TableSink中
revenue.insertInto("CsvSinkTable");
}
}
package com.basic;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.table.api.BatchTableEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.sinks.CsvTableSink;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.types.Row;
/**
* FlinkTable 任务 批数据查询
*/
public class FlinkTableBatch {
public static void main(String[] args) {
//1.创建TableEnvironment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = BatchTableEnvironment.getTableEnvironment(env);
//使用SQL操作table
//计算来自法国的收入
Table revenue = tableEnv.sqlQuery("select cid, cname, sum(revenue) as revsum " +
"from orders" +
"where country = 'france'" +
"group by cid,cname");
//将Table中的数据转化成DataStream
DataSet<Row> dsRow = ((org.apache.flink.table.api.java.BatchTableEnvironment) tableEnv).toDataSet(revenue, Row.class);
//将Table中的数据转化成DataStream
TupleTypeInfo<Tuple2<String, Integer>> tupleType = new TupleTypeInfo<>(Types.STRING, Types.INT);
DataSet<Tuple2<String, Integer>> dsTuple = ((org.apache.flink.table.api.java.BatchTableEnvironment) tableEnv).toDataSet(revenue, tupleType);
//通过TableSink把数据写到外部
//创建一个TableSink
TableSink csvSink = new CsvTableSink("path","字段之间的格式 ,");
//定义字段名和类型
String[] fieldNames = {
"cid", "cname", "revsum"};
TypeInformation[] filedTypes = {
Types.INT, Types.STRING, Types.INT};
//注册一个TableSink
tableEnv.registerTableSink("CsvSinkTable", fieldNames, filedTypes, csvSink);
//把结果添加到TableSink中
revenue.insertInto("CsvSinkTable");
}
}
pom文件
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<!--flink table核心包-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.7.2</version>
</dependency>
</dependencies>