写在前面
Flink 1.11.0 对JDBC connector 进行了一次较大的重构
Flink 1.11.0 以及之后版本需要采用flink-connector-jdbc + DataStream的方式写入数据到ClickHouse。
依赖:
...
ext {
javaVersion = '1.8'
flinkVersion = '1.13.2'
scalaBinaryVersion = '2.12'
log4jVersion = '2.12.1'
junitVersion = '4.13'
}
...
shadow "org.apache.flink:flink-table-api-scala-bridge_${scalaBinaryVersion}:${flinkVersion}"
shadow "org.apache.flink:flink-table-planner-blink_${scalaBinaryVersion}:${flinkVersion}"
shadow "org.apache.flink:flink-table-common:${flinkVersion}"
// 添加Flink JDBC connector 以及 Clickhouse JDBC Driver相关依赖
shadow "org.apache.flink:flink-connector-jdbc_${scalaBinaryVersion}:${flinkVersion}"
shadow "ru.yandex.clickhouse:clickhouse-jdbc:0.3.1"
示例程序:
示例程序使用 datagen 产生table souce, 通过 TableEnvironment.toAppendStream
将Table 转为 DataStream。使用JdbcSink 将数据写入 ClickHouse。
import org.apache.flink.api.common.JobExecutionResult
import org.apache.flink.connector.jdbc.{JdbcConnectionOptions, JdbcExecutionOptions, JdbcSink, JdbcStatementBuilder}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import java.sql.PreparedStatement
/**
* @Author chaos
* @Date 2021/9/6 下午2:13
* @Version 1.0
* @Description Flink 1.11.0 对JDBC connector 进行了一次较大的重构
* 重构之前(1.10.3 以及之前版本), 包名为 flink-jdbc.
* 重构之后(1.11.0 以及之后版本), 包名为 flink-connector-jdbc.
*
* Flink 1.11.0 以及之后版本需要采用flink-connector-jdbc + DataStream的方式写入数据到ClickHouse。
*/
object FlinkClickHouse {
class FlinkClickHouseJob {
def execute(): JobExecutionResult = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val tEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
tEnv.executeSql(
"""
|CREATE TABLE if not exists person_score_datagen (
| id INT,
| name STRING,
| age INT,
| score INT,
| ts AS LOCALTIMESTAMP,
| WATERMARK FOR ts AS ts )
|WITH (
| 'connector' = 'datagen',
| 'rows-per-second' = '2',
| 'fields.id.kind' = 'sequence',
| 'fields.id.start' = '1',
| 'fields.id.end' = '20',
| 'fields.name.length' = '6',
| 'fields.age.min' = '20',
| 'fields.age.max' = '30',
| 'fields.score.min' = '60',
| 'fields.score.max' = '100'
|)
|""".stripMargin)
val table: Table = tEnv.sqlQuery("select id, name, age, score from person_score_datagen")
val resultDataStream: DataStream[(Int, String, Int, Int)] = tEnv.toAppendStream[(Int, String, Int, Int)](table)
val insertIntoCkSql =
"""
| INSERT INTO person_score (
| pid, name, age, score
| ) VALUES (
| ?, ?, ?, ?
| )
""".stripMargin
val CkJdbcUrl = "jdbc:clickhouse://xxx.xxx.xxx.xxx:8123/tutorial"
val CkUsername = ""
val CkPassword = ""
/**
* 由于ClickHouse 单词插入延迟比较高,需要设置 BatchSize 来批量插入数据,提高性能。
*/
val BatchSize = 500 // 设置您的 batch size
resultDataStream.addSink(
JdbcSink.sink[(Int, String, Int, Int)](
insertIntoCkSql,
new CkSinkBuilder,
new JdbcExecutionOptions.Builder().withBatchSize(BatchSize).build(),
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withDriverName("ru.yandex.clickhouse.ClickHouseDriver")
.withUrl(CkJdbcUrl)
// .withUsername(CkUsername)
// .withUsername(CkPassword)
.build()
)
)
env.execute()
}
}
@throws[Exception]
def main(args: Array[String]): Unit = {
val job = new FlinkClickHouseJob
job.execute()
}
/**
* 当前版本flink-connector-jdbc, 使用Scala API 调用JdbcSink时会出现lambda函数的序列化问题,
* 只能采用手动实现interface的方式来传入相关JDBC Statement build函数(class CKSinkBuilder)
*/
class CkSinkBuilder extends JdbcStatementBuilder[(Int, String, Int, Int)] {
def accept(ps: PreparedStatement, v: (Int, String, Int, Int)): Unit = {
ps.setInt(1, v._1)
ps.setString(2, v._2)
ps.setInt(3, v._3)
ps.setInt(4, v._4)
}
}
}
参数说明:
CkJdbcUrl : 目标ClickHouse集群地址
CkUsername:目标ClickHouse集群用户名
CkPassword:目标ClickHouse集群密码