Apache Kafka SQL Connector
FlinkSql 读 Kafka
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
/**
* flink sql read kafka
*
* create by LiuJinHe 2020/8/12
*/
object FlinkSqlReadKafka {
def main(args: Array[String]): Unit = {
// 初始化 stream 环境
// 本地测试,需要 flink-runtime-web 依赖
val env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI()
env.setParallelism(1)
// val env = StreamExecutionEnvironment.getExecutionEnvironment
// 创建 streamTable 环境
val settings: EnvironmentSettings = EnvironmentSettings
.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)
val kafkaSql =
"""
|create temporary table kafkaTable (
| user_name string,
| user_id bigint,
| item_id bigint
|) with (
| 'connector' = 'kafka',
| 'topic' = 'flink_test',
| 'properties.bootstrap.servers' = 'localhost:9092',
| 'properties.group.id' = 'flink-test-group',
| 'format' = 'json',
| 'scan.startup.mode' = 'latest-offset'
|)
""".stripMargin
tableEnv.executeSql(kafkaSql)
val table = tableEnv.from("kafkaTable")
// val table = tableEnv.sqlQuery("select * from kafkaTable")
val resultDStream = tableEnv.toAppendStream[Row](table)
resultDStream.print()
env.execute("flink sql read kafka")
}
}
读写 Kafka
import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.environment.{CheckpointConfig, ExecutionCheckpointingOptions}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.SqlDialect
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.table.catalog.GenericInMemoryCatalog
/**
* flink sql kafka to kafka
*
* create by LiuJinHe 2020/8/12
*/
object FlinkSqlKafka2Kafka {
def main(args: Array[String]): Unit = {
// 初始化 stream 环境
// 本地测试,需要 flink-runtime-web 依赖
val env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI()
env.setParallelism(1)
// val env = StreamExecutionEnvironment.getExecutionEnvironment
// 失败重启,固定间隔,每隔3秒重启1次,总尝试重启10次
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 3))
// 本地测试线程 1
env.setParallelism(1)
// 事件处理的时间,由系统时间决定
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)
// 创建 streamTable 环境
val settings: EnvironmentSettings = EnvironmentSettings
.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)
// checkpoint 设置
val tableConfig = tableEnv.getConfig.getConfiguration
tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_MODE, CheckpointingMode.EXACTLY_ONCE)
// checkpoint的超时时间周期,1 分钟做一次checkpoint
tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.ofSeconds(60001))
// checkpoint的超时时间, 检查点一分钟内没有完成将被丢弃
tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_TIMEOUT, Duration.ofSeconds(60000))
// checkpoint 最小间隔,两个检查点之间至少间隔 30 秒
tableConfig.set(ExecutionCheckpointingOptions.MIN_PAUSE_BETWEEN_CHECKPOINTS, Duration.ofSeconds(30000))
// 同一时间只允许进行一个检查点
tableConfig.set(ExecutionCheckpointingOptions.MAX_CONCURRENT_CHECKPOINTS, Integer.valueOf(1))
// 手动cancel时是否保留checkpoint
tableConfig.set(ExecutionCheckpointingOptions.EXTERNALIZED_CHECKPOINT,
CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
// Catalog
val memoryCatalog = new GenericInMemoryCatalog("kafkaSourceTable", "memory")
// kafka 源表
val kafkaSourceSql =
"""
|create table kafkaSourceTable (
| user_name string,
| user_id bigint,
| login_time bigint,
| event_time as to_timestamp(from_unixtime(`time` / 1000, 'yyyy-MM-dd HH:mm:ss')),
| row_time as proctime()
|) with (
| 'connector' = 'kafka',
| 'topic' = 'flink_test',
| 'properties.bootstrap.servers' = 'localhost:9092',
| 'properties.group.id' = 'flink-test-group',
| 'format' = 'json',
| 'scan.startup.mode' = 'group-offsets'
|)
""".stripMargin
// login_time 13 位
// as to_timestamp(from_unixtime(login_time / 1000, 'yyyy-MM-dd HH:mm:ss')), 事件时间
// as localtimestamp, 系统时间,当为 processTime 时,不能使用 watermark
// 当为 eventTime 时,watermark 可以指定延迟时间
// watermark for event_time as event_time- interval '5' second, 允许 5 秒延迟
// Kafka 输出结果表
val kafkaSinkSql =
"""
|create table kafkaSinkTable (
| user_name string,
| user_id bigint,
| login_time bigint,
| event_time string
|) with (
| 'connector' = 'kafka',
| 'topic' = 'flink_test2',
| 'properties.bootstrap.servers' = 'localhost:9092',
| 'properties.group.id' = 'flink-test-group',
| 'format' = 'json',
| 'sink.partitioner' = 'fixed'
|)
""".stripMargin
val insertSql =
"""
|insert into kafkaSinkTable select * from kafkaSourceTable
""".stripMargin
tableEnv.registerCatalog("memoryCatalog", memoryCatalog)
tableEnv.useCatalog("memoryCatalog")
tableEnv.getConfig.setSqlDialect(SqlDialect.DEFAULT)
// 1
tableEnv.executeSql(kafkaSourceSql)
tableEnv.executeSql(kafkaSinkSql)
tableEnv.executeSql(insertSql)
// println(tableEnv.explainSql(insertSql))
// 2
// tableEnv.executeSql(kafkaSourceSql)
// val table = tableEnv.from("kafkaSourceTable")
// tableEnv.executeSql(kafkaSinkSql)
// table.executeInsert("kafkaSinkTable")
}
}
pom.xml
UTF-8
1.8
1.8
2.11.8
2.11
1.11.1
org.apache.flink
flink-runtime-web_${scala.binary.version}
${flink.version}
org.apache.flink
flink-scala_${scala.binary.version}
${flink.version}
org.apache.flink
flink-table-common
${flink.version}
org.apache.flink
flink-table-api-scala-bridge_${scala.binary.version}
${flink.version}
org.apache.flink
flink-streaming-scala_${scala.binary.version}
${flink.version}
org.apache.flink
flink-connector-kafka_${scala.binary.version}
${flink.version}
org.apache.flink
flink-table-planner-blink_${scala.binary.version}
${flink.version}
org.apache.flink
flink-clients_${scala.binary.version}
${flink.version}
org.apache.flink
flink-json
${flink.version}
net.alchim31.maven
scala-maven-plugin
4.2.0
compile
org.apache.maven.plugins
maven-assembly-plugin
3.0.0
jar-with-dependencies
make-assembly
package
single