flink(scala版)学习二之常用的Sink

Sink之kafka存储

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaProducer, KafkaSerializationSchema}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.apache.flink.streaming.api.scala._

import java.lang
import java.util.Properties

object KafkaSink {
  def main(args: Array[String]): Unit = {
    val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(1) //默认情况下每个任务的并行度为1

    //读取netcat流中数据 (实时流)
    val stream1: DataStream[String] = streamEnv.socketTextStream("127.0.0.1", 6666)

    //转换计算
    val ds = stream1.flatMap(_.split(" "))
      .map((_, 1))
      .keyBy(0)
      .sum(1)

    val result = ds.map(line => {
      ("", line._1 + "_" + line._2)  // key设置为空
    })

    //Kafka生产者的配置
    val props = new Properties()
    props.setProperty("bootstrap.servers", "172.16.254.4:9092,172.16.254.5:9092,172.16.254.6:9092")
    props.setProperty("key.serializer", classOf[ByteArraySerializer].getName)
    props.setProperty("value.serializer", classOf[ByteArraySerializer].getName)

    //数据写入Kafka,并且是KeyValue格式的数据
    result.addSink(new FlinkKafkaProducer[(String, String)]("t_topic",
      new KafkaSerializationSchema[(String, String)] {
        override def serialize(element: (String, String), aLong: lang.Long): ProducerRecord[Array[Byte], Array[Byte]] = {
          new ProducerRecord("t_topic", element._1.getBytes, (element._2 + "").getBytes())
        }
      }, props, FlinkKafkaProducer.Semantic.EXACTLY_ONCE)) //EXACTLY_ONCE 精确一次

    streamEnv.execute()
  }
}

结果:
flink(scala版)学习二之常用的Sink_第1张图片

Sink之redis存储

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
import org.apache.flink.streaming.api.scala._


object RedisSink {
  def main(args: Array[String]): Unit = {
    //初始化Flink的Streaming(流计算)上下文执行环境
    val streamEnv= StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(1)

    //读取数据
    val stream = streamEnv.socketTextStream("127.0.0.1",6666)

    //转换计算
    val result = stream.flatMap(_.split(","))
      .map((_, 1))
      .keyBy(0)
      .sum(1)

    //连接redis的配置
    val config = new  FlinkJedisPoolConfig.Builder().setDatabase(1).setHost("127.0.0.1").setPort(6379).build()

    //写入redis
    result.addSink(new RedisSink[(String, Int)](config,new RedisMapper[(String, Int)] {
      override def getCommandDescription = new RedisCommandDescription(RedisCommand.HSET,"t_wc") //t_wc是表名

      override def getKeyFromData(data: (String, Int)) = {
        data._1 //单词
      }

      override def getValueFromData(data: (String, Int)) = {
        data._2+"" //单词出现的次数
      }
    }))

    streamEnv.execute()
  }
}

结果展示:
flink(scala版)学习二之常用的Sink_第2张图片

Sink之自定义存储(mysql)

import com.bjsxt.practice.source.StationLog
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala._
import java.sql.{Connection, DriverManager, PreparedStatement}


//自定义一个Sink写入Mysql
class MyCustomSink extends RichSinkFunction[StationLog] {
  var conn: Connection = _
  var pst: PreparedStatement = _

  //生命周期管理,在Sink初始化的时候调用
  override def open(parameters: Configuration): Unit = {
    conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/traffic_monitor?characterEncoding=utf-8&useSSL=false", "root", "mysql")
    pst = conn.prepareStatement("insert into t_station_log (sid,call_out,call_in,call_type,call_time,duration) values (?,?,?,?,?,?)")
  }

  //把StationLog 写入到表t_station_log,循环调用方法,输出一条数据调用一次
  override def invoke(value: StationLog, context: SinkFunction.Context[_]): Unit = {
    pst.setString(1, value.sid)
    pst.setString(2, value.callOut)
    pst.setString(3, value.callIn)
    pst.setString(4, value.callType)
    pst.setLong(5, value.callTime)
    pst.setLong(6, value.duration)
    pst.executeUpdate()

  }

  override def close(): Unit = {
    pst.close()
    conn.close()
  }
}

object CustomJdbcSink {
  def main(args: Array[String]): Unit = {
    //初始化Flink的Streaming(流计算)上下文执行环境
    val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(1)

    val data:DataStream[String] = streamEnv.socketTextStream("127.0.0.1", 6666)

    val result = data.map(line=>{
      val arr = line.split(",")
      StationLog(arr(0),arr(1),arr(2),arr(3),arr(4).toLong,arr(5).toLong)
    })
    //数据写入msyql
    result.addSink(new MyCustomSink)
    streamEnv.execute()
  }
}

结果:
在这里插入图片描述
t_station_log建表语句:

CREATE TABLE `t_station_log` (
  `sid` varchar(255) DEFAULT NULL,
  `call_out` varchar(255) DEFAULT NULL,
  `call_in` varchar(255) DEFAULT NULL,
  `call_type` varchar(255) DEFAULT NULL,
  `call_time` varchar(255) DEFAULT NULL,
  `duration` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8

你可能感兴趣的:(flink,scala,flink)