FlinkSql Kafka to Kafka

Apache Kafka SQL Connector


FlinkSql 读 Kafka

import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row

/**
  * flink sql read kafka
  *
  * create by LiuJinHe 2020/8/12
  */
object FlinkSqlReadKafka {

  def main(args: Array[String]): Unit = {
    // 初始化 stream 环境
    // 本地测试,需要 flink-runtime-web 依赖
    val env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI()
    env.setParallelism(1)
    //    val env = StreamExecutionEnvironment.getExecutionEnvironment

    // 创建 streamTable 环境
    val settings: EnvironmentSettings = EnvironmentSettings
      .newInstance()
      .useBlinkPlanner()
      .inStreamingMode()
      .build()

    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)
    val kafkaSql =
      """
        |create temporary table kafkaTable (
        | user_name string,
        | user_id bigint,
        | item_id bigint
        |) with (
        | 'connector' = 'kafka',
        | 'topic' = 'flink_test',
        | 'properties.bootstrap.servers' = 'localhost:9092',
        | 'properties.group.id' = 'flink-test-group',
        | 'format' = 'json',
        | 'scan.startup.mode' = 'latest-offset'
        |)
      """.stripMargin

    tableEnv.executeSql(kafkaSql)

    val table = tableEnv.from("kafkaTable")
    //  val table = tableEnv.sqlQuery("select * from kafkaTable")

    val resultDStream = tableEnv.toAppendStream[Row](table)
    
    resultDStream.print()

    env.execute("flink sql read kafka")
  }
}

读写 Kafka

import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.environment.{CheckpointConfig, ExecutionCheckpointingOptions}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.SqlDialect
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.table.catalog.GenericInMemoryCatalog

/**
  * flink sql kafka to kafka
  *
  * create by LiuJinHe 2020/8/12
  */
object FlinkSqlKafka2Kafka {

  def main(args: Array[String]): Unit = {
    // 初始化 stream 环境
    // 本地测试,需要 flink-runtime-web 依赖
    val env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI()
    env.setParallelism(1)
    //    val env = StreamExecutionEnvironment.getExecutionEnvironment

    // 失败重启,固定间隔,每隔3秒重启1次,总尝试重启10次
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 3))
    // 本地测试线程 1
    env.setParallelism(1)

    // 事件处理的时间,由系统时间决定
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)

    // 创建 streamTable 环境
    val settings: EnvironmentSettings = EnvironmentSettings
      .newInstance()
      .useBlinkPlanner()
      .inStreamingMode()
      .build()

    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)

    // checkpoint 设置
    val tableConfig = tableEnv.getConfig.getConfiguration
    tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_MODE, CheckpointingMode.EXACTLY_ONCE)
    // checkpoint的超时时间周期,1 分钟做一次checkpoint
    tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.ofSeconds(60001))
    // checkpoint的超时时间, 检查点一分钟内没有完成将被丢弃
    tableConfig.set(ExecutionCheckpointingOptions.CHECKPOINTING_TIMEOUT, Duration.ofSeconds(60000))
    // checkpoint 最小间隔,两个检查点之间至少间隔 30 秒
    tableConfig.set(ExecutionCheckpointingOptions.MIN_PAUSE_BETWEEN_CHECKPOINTS, Duration.ofSeconds(30000))
    // 同一时间只允许进行一个检查点
    tableConfig.set(ExecutionCheckpointingOptions.MAX_CONCURRENT_CHECKPOINTS,  Integer.valueOf(1))
    // 手动cancel时是否保留checkpoint
    tableConfig.set(ExecutionCheckpointingOptions.EXTERNALIZED_CHECKPOINT,
      CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)

    // Catalog
    val memoryCatalog = new GenericInMemoryCatalog("kafkaSourceTable", "memory")
    
    // kafka 源表
    val kafkaSourceSql =
      """
        |create table kafkaSourceTable (
        | user_name string,
        | user_id bigint,
        | login_time bigint,
        | event_time as to_timestamp(from_unixtime(`time` / 1000, 'yyyy-MM-dd HH:mm:ss')),
        | row_time as proctime()
        |) with (
        | 'connector' = 'kafka',
        | 'topic' = 'flink_test',
        | 'properties.bootstrap.servers' = 'localhost:9092',
        | 'properties.group.id' = 'flink-test-group',
        | 'format' = 'json',
        | 'scan.startup.mode' = 'group-offsets'
        |)
      """.stripMargin
    // login_time 13 位
    // as to_timestamp(from_unixtime(login_time / 1000, 'yyyy-MM-dd HH:mm:ss')), 事件时间
    // as localtimestamp, 系统时间,当为 processTime 时,不能使用 watermark
    // 当为 eventTime 时,watermark 可以指定延迟时间
    // watermark for event_time as event_time- interval '5' second, 允许 5 秒延迟

    // Kafka 输出结果表
    val kafkaSinkSql =
      """
        |create table kafkaSinkTable (
        | user_name string,
        | user_id bigint,
        | login_time bigint,
        | event_time string
        |) with (
        | 'connector' = 'kafka',
        | 'topic' = 'flink_test2',
        | 'properties.bootstrap.servers' = 'localhost:9092',
        | 'properties.group.id' = 'flink-test-group',
        | 'format' = 'json',
        | 'sink.partitioner' = 'fixed'
        |)
      """.stripMargin

    val insertSql =
      """
        |insert into kafkaSinkTable select * from kafkaSourceTable
      """.stripMargin

    tableEnv.registerCatalog("memoryCatalog", memoryCatalog)
    tableEnv.useCatalog("memoryCatalog")
    tableEnv.getConfig.setSqlDialect(SqlDialect.DEFAULT)

    // 1
    tableEnv.executeSql(kafkaSourceSql)
    tableEnv.executeSql(kafkaSinkSql)
    tableEnv.executeSql(insertSql)
    // println(tableEnv.explainSql(insertSql))

    // 2
    //    tableEnv.executeSql(kafkaSourceSql)
    //    val table = tableEnv.from("kafkaSourceTable")
    //    tableEnv.executeSql(kafkaSinkSql)
    //    table.executeInsert("kafkaSinkTable")
  }
}

pom.xml

   
        UTF-8
        1.8
        1.8
        2.11.8
        2.11
        1.11.1
    

    
        
        
            org.apache.flink
            flink-runtime-web_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-scala_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-table-common
            ${flink.version}
        
        
            org.apache.flink
            flink-table-api-scala-bridge_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-streaming-scala_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-connector-kafka_${scala.binary.version}
            ${flink.version}
        

        
            org.apache.flink
            flink-table-planner-blink_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-clients_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-json
            ${flink.version}
        

    

    
        
            
            
                net.alchim31.maven
                scala-maven-plugin
                4.2.0
                
                    
                        
                            compile
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-assembly-plugin
                3.0.0
                
                    
                        jar-with-dependencies
                    
                
                
                    
                        make-assembly
                        package
                        
                            single
                        
                    
                
            
        
    

 

你可能感兴趣的:(Flink)