Flink的Table API和SQL程序可以连接其他的外围系统,用于读写批和流中的表。其中一个TableSource提供了访问外围系统的数据例如:数据库、key-value仓库、消息队列或者是文件系统等。Table Sink会将计算的结果输出给外部存储系统。这些取决于Source和Sink的类型,他们支持很多种数据类型,例如:CSV、Parquet、ORC等。接下来的我们来看看一些内建的TableSource和Table Sink,然后尝试将他们注册给Flink,当这些TableSource和TableSink被注册给Flink之后,用户就可以使用Table API&SQL访问了。
名字 | 版本 | maven依赖 | SQL Client JAR |
---|---|---|---|
FileSystem | 无需,Bulit-in | Bulit-in | |
Elasticsearch | 6 | flink-connector-elasticsearch6 | 下载 |
Elasticsearch | 7 | flink-connector-elasticsearch7 | 下载 |
Apache Kafka | 0.10 | flink-connector-kafka-0.10 | 下载 |
Apache Kafka | 0.11 | flink-connector-kafka-0.11 | 下载 |
Apache Kafka | 0.11+ (universal ) |
flink-connector-kafka | 下载 |
Apache Hbase | 1.4.3 | flink-connector-hbase | 下载 |
JDBC | flink-connector-jdbc | 下载 |
名字 | maven依赖 | SQL Client JAR |
---|---|---|
Old CSV(For File) | Bulit-in | Built-in |
CSV(for Kafka) | flink-csv | Built-in |
json | flink-json | Built-in |
Apache AVRO | flink-avro | 下载 |
FileSystem连接器允许从本地或者分布式文件系统中读取,仅仅支持Old CSV格式,支持Source: BatchSource: StreamingFormat: OldCsv-only
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.table.api.{DataTypes, Table}
import org.apache.flink.table.api.scala.BatchTableEnvironment
import org.apache.flink.table.descriptors.{FileSystem, OldCsv, Schema}
import org.apache.flink.streaming.api.scala._
object FlinkTableFileSystem_Batch {
def main(args: Array[String]): Unit = {
val fbEnv = ExecutionEnvironment.getExecutionEnvironment
val fbtEnv = BatchTableEnvironment.create(fbEnv)
//连接目标系统
val fileSystem=new FileSystem().path("C:\\Users\\Administrator\\Desktop\\csv")
//目前仅仅支持OldCSV格式
val oldCsv = new OldCsv()
.fieldDelimiter(",")
.lineDelimiter("\n")
.ignoreFirstLine()
.ignoreParseErrors()
//设定读取数据的表结构
val schema = new Schema()
.field("id",DataTypes.INT())
.field("name",DataTypes.STRING())
.field("sex",DataTypes.BOOLEAN())
.field("salary",DataTypes.DOUBLE())
.field("dept",DataTypes.STRING())
//层创建表
fbtEnv.connect(fileSystem)
.withFormat(oldCsv)
.withSchema(schema)
.createTemporaryTable("t_employee")
//将表转换为DataSet
val resultTable: Table = fbtEnv.sqlQuery("select * from t_employee")
//输出表结构
resultTable.printSchema()
//将表的数据转换为Dataset处理
fbtEnv.toDataSet[(Int,String,Boolean,Double,String)](resultTable)
.print()
}
}
CREATE TABLE t_employee (
id int,
name string,
sex boolean,
salary double,
dept string
) WITH (
'connector.type' = 'filesystem',
'connector.path' = 'file:///path/to/whatever',
'format.type' = 'csv',
'format.field-delimiter' = ',',
'format.line-delimiter' = U&'\000A',
'format.quote-character' = '',
'format.ignore-first-line' = 'true',
'format.ignore-parse-errors' = 'true',
'format.fields.0.name' = 'id',
'format.fields.0.data-type' = 'integer',
'format.fields.1.name' = 'name',
'format.fields.1.data-type' = 'string',
'format.fields.2.name' = 'sex',
'format.fields.2.data-type' = 'boolean',
'format.fields.3.name' = 'salary',
'format.fields.3.data-type' = 'double',
'format.fields.4.name' = 'dept',
'format.fields.4.data-type' = 'string'
)
object FlinkTableFileSystem_Batch_DDL {
def main(args: Array[String]): Unit = {
val fbEnv = ExecutionEnvironment.getExecutionEnvironment
val fbtEnv = BatchTableEnvironment.create(fbEnv)
var ddl= ...
fbtEnv.sqlUpdate(ddlTable)
//将表转换为DataSet
val resultTable: Table = fbtEnv.sqlQuery("select * from t_employee")
//输出表结构
resultTable.printSchema()
//将表的数据转换为Dataset处理
fbtEnv.toDataSet[(Int,String,Boolean,Double,String)](resultTable)
.print()
}
}
import org.apache.flink.api.common.typeinfo.{TypeInformation, Types}
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.scala.BatchTableEnvironment
import org.apache.flink.table.sources.CsvTableSource
import org.apache.flink.types.Row
object FlinkTableFileSystem_Batch_CSVTableSource {
def main(args: Array[String]): Unit = {
val fbEnv = ExecutionEnvironment.getExecutionEnvironment
val fbtEnv = BatchTableEnvironment.create(fbEnv)
//创建CsvTableSource实例
var path="C:\\Users\\Administrator\\Desktop\\csv"
var fieldNames=Array[String]("id","name","sex","salary","dept")
var fieldTypes=Array[TypeInformation[_]](Types.INT,Types.STRING,Types.BOOLEAN,Types.DOUBLE,Types.STRING)
val csvTableSource = new CsvTableSource(path, fieldNames, fieldTypes,",","\n",null,true,null,true)
//注册一个表
fbtEnv.registerTableSource("t_employee",csvTableSource)
var resultTable=fbtEnv.sqlQuery("select * from t_employee")
//将表的数据转换为Dataset处理
fbtEnv.toDataSet[Row](resultTable)
.print()
}
}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.api.{DataTypes, Table}
import org.apache.flink.table.descriptors.{FileSystem, OldCsv, Schema}
import org.apache.flink.types.Row
object FlinkTableFileSystem_Stream_TableAPI {
def main(args: Array[String]): Unit = {
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val fstEnv = StreamTableEnvironment.create(fsEnv)
//连接目标系统
val fileSystem=new FileSystem().path("file:///C:\\Users\\Administrator\\Desktop\\csv")
//目前仅仅支持OldCSV格式
val oldCsv = new OldCsv()
.fieldDelimiter(",")
.lineDelimiter("\n")
.ignoreFirstLine()
.ignoreParseErrors()
//设定读取数据的表结构
val schema = new Schema()
.field("id",DataTypes.INT())
.field("name",DataTypes.STRING())
.field("sex",DataTypes.BOOLEAN())
.field("salary",DataTypes.DOUBLE())
.field("dept",DataTypes.STRING())
//层创建表
fstEnv.connect(fileSystem)
.withFormat(oldCsv)
.withSchema(schema)
.createTemporaryTable("t_employee")
//将表转换为DataSet
val resultTable: Table = fstEnv.sqlQuery("select * from t_employee")
//输出表结构
resultTable.printSchema()
fstEnv.toAppendStream[Row](resultTable)
.print()
fstEnv.execute("FlinkTableFileSystem_Stream")
}
}
CREATE TABLE t_employee (
id int,
name string,
sex boolean,
salary double,
dept string
) WITH (
'connector.type' = 'filesystem',
'connector.path' = 'file:///C:\Users\Administrator\Desktop\csv',
'format.type' = 'csv',
'format.field-delimiter' = ',',
'format.line-delimiter' = U&'\000A',
'format.ignore-first-line' = 'true',
'format.ignore-parse-errors' = 'true',
'format.fields.0.name' = 'id',
'format.fields.0.data-type' = 'integer',
'format.fields.1.name' = 'name',
'format.fields.1.data-type' = 'string',
'format.fields.2.name' = 'sex',
'format.fields.2.data-type' = 'boolean',
'format.fields.3.name' = 'salary',
'format.fields.3.data-type' = 'double',
'format.fields.4.name' = 'dept',
'format.fields.4.data-type' = 'string'
)
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val fstEnv = StreamTableEnvironment.create(fsEnv)
var ddl= ...
fstEnv.sqlUpdate(ddl)
//将表转换为DataSet
val resultTable: Table = fstEnv.sqlQuery("select * from t_employee")
//输出表结构
resultTable.printSchema()
//将表的数据转换为Dataset处理
fstEnv.toAppendStream[(Int,String,Boolean,Double,String)](resultTable)
.print()
fstEnv.execute("FlinkTableFileSystem_Stream_DDL")
import org.apache.flink.api.common.typeinfo.{TypeInformation, Types}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.scala.{BatchTableEnvironment, StreamTableEnvironment}
import org.apache.flink.table.sources.CsvTableSource
import org.apache.flink.types.Row
object FlinkTableFileSystem_Stream_CSVTableSource {
def main(args: Array[String]): Unit = {
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
val fstEnv = StreamTableEnvironment.create(fsEnv)
//创建CsvTableSource实例
var path="C:\\Users\\Administrator\\Desktop\\csv"
var fieldNames=Array[String]("id","name","sex","salary","dept")
var fieldTypes=Array[TypeInformation[_]](Types.INT,Types.STRING,Types.BOOLEAN,Types.DOUBLE,Types.STRING)
val csvTableSource = new CsvTableSource(path, fieldNames, fieldTypes,",","\n",null,true,null,true)
//注册一个表
fstEnv.registerTableSource("t_employee",csvTableSource)
var resultTable=fstEnv.sqlQuery("select * from t_employee")
//将表的数据转换为Dataset处理
fstEnv.toAppendStream[Row](resultTable)
.print()
fstEnv.execute("FlinkTableFileSystem_Stream_CSVTableSource")
}
}
Kafka Connector允许从kafka消息队列读取或者写出消息,Source:Streaming 、 Format:CSV,JSON,AVRO,这里需要注意与FileSystem连接器不同,Kafka Connector仅仅支持StreamTableSource,因此不能再批处理模型中使用Kafka Connector
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_2.11