FLink学习笔记:05-Flink DataStream的Sink操作

文章目录

    • 文件Sink
    • KafkaSink
    • ElasticSearchSink
    • Redis Sink
    • JDBC Sink

文件Sink

从Kafka中读取数据,数据做转换后,并将数据写入到文件中

package com.hjt.yxh.hw.apitest

import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema}
import org.apache.flink.connector.kafka.source.KafkaSource
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment


object FileSinkTest {
  def main(args: Array[String]): Unit = {

    //创建环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    //添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
    val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
      .setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
      .setTopics("sensor")
      .setValueOnlyDeserializer(new SimpleStringSchema())
      .setGroupId("myGroup")
      .setStartingOffsets(OffsetsInitializer.latest())
      .build()

    val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")

    val sensorStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
    })

    sensorStream.print()
    sensorStream.addSink(StreamingFileSink.forRowFormat(new Path("D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\file-sink-out"),new SimpleStringEncoder[SensorReading]()).build())

    env.execute("flink test")

  }
}

方式二:使用FileSink



    org.apache.flink
    flink-connector-files
    1.14.4

package com.hjt.yxh.hw.apitest

import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema}
import org.apache.flink.connector.file.sink.FileSink
import org.apache.flink.connector.kafka.source.KafkaSource
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment


object FileSinkTest {
  def main(args: Array[String]): Unit = {

    //创建环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    //添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
    val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
      .setBootstrapServers("k8s-node3:9092,k8s-node5:9092,k8s-node8:9092")
      .setTopics("mytopic")
      .setValueOnlyDeserializer(new SimpleStringSchema())
      .setGroupId("myGroup")
      .setStartingOffsets(OffsetsInitializer.latest())
      .build()

    val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")

    val sensorStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
    })
      val outPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\file-sink-out"
      val fileSink = FileSink.forRowFormat(new Path(outPath),new SimpleStringEncoder[SensorReading]()).build()

    sensorStream.sinkTo(fileSink)
    env.execute("flink test")

  }
}

KafkaSink

添加maven依赖,在pom.xml文件中添加如下依赖包


<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-streaming-scala_2.12artifactId>
    <version>1.14.4version>
dependency>


<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-scala_2.12artifactId>
    <version>1.14.4version>
dependency>

 
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-clients_2.12artifactId>
    <version>1.14.4version>
dependency>

<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-connector-kafka_2.12artifactId>
    <version>1.14.4version>
dependency>

代码:

package com.hjt.yxh.hw.apitest
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.connector.kafka.sink.{KafkaRecordSerializationSchema, KafkaSink}
import org.apache.flink.connector.kafka.source.{KafkaSource}
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

object KafkaSinkTest {
  def main(args: Array[String]): Unit = {

    //创建环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    //添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
    val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
      .setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
      .setTopics("sensor")
      .setValueOnlyDeserializer(new SimpleStringSchema())
      .setGroupId("myGroup")
      .setStartingOffsets(OffsetsInitializer.latest())
      .build()

    val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")

    val sensorStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble).toString
    })

    val kafkaSink = KafkaSink.builder[String]()
      .setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
        .setRecordSerializer(
          KafkaRecordSerializationSchema.builder()
            .setTopic("mytopic")
            .setValueSerializationSchema(new SimpleStringSchema())
            .build()
        )

    sensorStream.sinkTo(kafkaSink.build())

    env.execute("flink test")

  }
}

ElasticSearchSink

添加maven依赖,在pom.xml文件中添加如下依赖包


<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-streaming-scala_2.12artifactId>
    <version>1.14.4version>
dependency>


<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-scala_2.12artifactId>
    <version>1.14.4version>
dependency>

 
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-clients_2.12artifactId>
    <version>1.14.4version>
dependency>


<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-connector-elasticsearch7_2.12artifactId>
    <version>1.14.4version>
dependency>

package com.hjt.yxh.hw.apitest

import java.util

import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.elasticsearch.{ ElasticsearchSinkFunction, RequestIndexer}
import org.apache.flink.streaming.connectors.elasticsearch7.{ElasticsearchSink}
import org.apache.http.HttpHost
import org.elasticsearch.client.{Requests}
import org.elasticsearch.common.xcontent.{XContentType}

object EsSinkTest {
  def main(args: Array[String]): Unit = {

    //1.创建环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val inputPath = "D:\\\\LearnWorkSpace\\\\FlinkDemo\\\\src\\\\main\\\\resources\\\\Data\\\\sensor.txt"

    val dataStream = env.readTextFile(inputPath)

    val sensorReadingStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
    })

    val httpHosts = new java.util.ArrayList[HttpHost]
    httpHosts.add(new HttpHost("192.168.0.52", 9200, "http"))
    httpHosts.add(new HttpHost("192.168.0.115", 9200, "http"))
    val myEsSinkFunc = new ElasticsearchSinkFunction[SensorReading] {
      override def process(element: SensorReading, ctx: RuntimeContext, indexer: RequestIndexer): Unit = {
        val dataSource = new util.HashMap[String,String]()
        dataSource.put("id",element.id)
        dataSource.put("temperature",element.temperature.toString)
        dataSource.put("timestamp",element.timestamp.toString)

        //创建index request,用于发送请求
        val indexRequest = Requests.indexRequest()
          .index("sensor")
          .source(dataSource,XContentType.JSON)

        indexer.add(indexRequest)
      }
    }

    val esSinkBuilder = new ElasticsearchSink.Builder[SensorReading](httpHosts,myEsSinkFunc)

    sensorReadingStream.print()
    sensorReadingStream.addSink(esSinkBuilder.build()).setParallelism(2)

    env.execute("elasticsearch Sink Test")
  }
}

Redis Sink



    org.apache.flink
    flink-streaming-scala_2.12
    1.14.4




    org.apache.flink
    flink-scala_2.12
    1.14.4


 

    org.apache.flink
    flink-clients_2.12
    1.14.4




    org.apache.bahir
    flink-connector-redis_2.11
    1.0


package com.hjt.yxh.hw.apitest

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.{FlinkJedisConfigBase, FlinkJedisPoolConfig}
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}


object RedisSinkTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val inputPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\Data\\sensor.txt"

    val dataStream = env.readTextFile(inputPath)

    val sensorStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
    })


    //写入到Redis
    val jedisConfig = new FlinkJedisPoolConfig.Builder()
      .setHost("192.168.0.46")
      .setDatabase(0)
      .build()

    val myRedisSinkFunction = new RedisSink(jedisConfig,new MyRedisMapper)

    sensorStream.addSink(myRedisSinkFunction)

    env.execute("redis sink test")
  }
}

class MyRedisMapper extends RedisMapper[SensorReading] {
  override def getCommandDescription: RedisCommandDescription = {
    val redisCommandDescription = new RedisCommandDescription(RedisCommand.HSET,"DATA_TYPE_SENSOR")
    return redisCommandDescription
  }

  override def getKeyFromData(data: SensorReading): String = {
    data.id
  }

  override def getValueFromData(data: SensorReading): String = {
    data.toString
  }
}

JDBC Sink



    org.apache.flink
    flink-streaming-scala_2.12
    1.14.4




    org.apache.flink
    flink-scala_2.12
    1.14.4


 

    org.apache.flink
    flink-clients_2.12
    1.14.4




    org.apache.flink
    flink-jdbc_2.12
    1.10.3




    mysql
    mysql-connector-java
    5.1.49


代码:

package com.hjt.yxh.hw.apitest


import java.sql.{Connection, DriverManager, PreparedStatement}

import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.scala._

object JdbcSinkTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val inputPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\Data\\sensor.txt"

    val dataStream = env.readTextFile(inputPath)

    val sensorStream = dataStream.map(data=>{
      val arry = data.split(",")
      SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
    })

    sensorStream.addSink(new MySqlSink())

    env.execute()
  }

}

class MySqlSink extends RichSinkFunction[SensorReading]{

  var conn:Connection = _
  var insertStatement:PreparedStatement = _
  var updateStatement:PreparedStatement = _

  override def open(parameters: Configuration): Unit = {
    conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/news","root","mysqlroot")
    insertStatement  = conn.prepareStatement("insert into sensor(sensorId,timestamp,temperature) values(?,?,?)")
    updateStatement = conn.prepareStatement("update sensor set timestamp = ? ,temperature = ? where sensorId = ?")
  }

  override def invoke(value: SensorReading): Unit = {
    updateStatement.setLong(1,value.timestamp)
    updateStatement.setDouble(2,value.temperature)
    updateStatement.setString(3,value.id)
    updateStatement.execute()
    if (updateStatement.getUpdateCount == 0)
    {
      insertStatement.setString(1,value.id)
      insertStatement.setLong(2,value.timestamp)
      insertStatement.setDouble(3,value.temperature)
      insertStatement.execute()
    }
  }
  override def close(): Unit = {
    insertStatement.close()
    updateStatement.close()
    conn.close()
  }
}


你可能感兴趣的:(FLink,大数据,flink,kafka)