flink3 分流 合流 自定义map 自定义filter

文件 
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718129,29.8
sensor_1,1547718158,5.8
sensor_1,1547718140,40.8
sensor_1,1547718111,11.8

 

package com.apitest

import org.apache.flink.api.common.functions.{FilterFunction, ReduceFunction, RichMapFunction}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
//定义样例类,温度传感器
case class SensorReading2(id:String,timestamp:Long,temperature:Double)
object TransformTest {
  def main(args: Array[String]): Unit = {
    val env=StreamExecutionEnvironment.getExecutionEnvironment
    val inputPath ="D:\\workspace\\ideastudy\\flinkstudy\\src\\main\\scala\\com\\apitest\\sensor.txt"
    env.setParallelism(1)//为了测试保持读取数据的顺序
    val inputStream = env.readTextFile(inputPath)
    //1.先转换成样例类类型
    val dataStream= inputStream.map(data=>{
      val arr = data.split(",")
      SensorReading2(arr(0),arr(1).toLong,arr(2).toDouble)
    })
    //dataStream.print()
    //2.分组聚合,输出每个传感器当前温度最小值
    val aggStream=dataStream
      .keyBy("id") //根据id进行分组
      .minBy("temperature")

    //aggStream.print()

    //3.需要输出当前最小的温度值,以及最近的时间戳
    val resultStream = dataStream
        .keyBy("id")
//        .reduce((curState,newData)=>
//        SensorReading2(curState.id,newData.timestamp,curState.temperature.min(newData.temperature))
//        ) 第一种写法用表达式
        .reduce(new MyReduceFunction)//第二种写法 传入一个函数类
       //resultStream.print()


    //4.多流转换操作
    //4.1 分流,将传感器温度数据分为低温、高温两条流
    val splitStream=dataStream
        .split(data=>{
          if(data.temperature>30.0) {
            Seq("high")
          }else{
            Seq("low")
          }
        })
     val highTempStream = splitStream.select("high")
     val lowTempStream = splitStream.select("low")
     val allTempStream = splitStream.select("high","low")

    // highTempStream.print("high")
    // lowTempStream.print("low")
    // allTempStream.print("all")

    //4.2 合流操作
    val warningStream=highTempStream.map(data => (data.id,data.temperature))
    val connectedStreams = warningStream.connect(lowTempStream) //合流数据类型可以不一样

    // 用coMap对数据进行分别处理
    val coMapResultStream=connectedStreams
        .map(
          warningData=>(warningData._1,warningData._2,"warning"),
          lowTempData=>(lowTempData.id,"healthy")
        ) //coMap处理的时候返回值也可以不一样

   // coMapResultStream.print("coMap")

    //4.3 union 合流 ,数据必须一致
    val unionStream =highTempStream.union(lowTempStream)

    //5.自定义filter
    val filterStream=dataStream.filter(new MyFilter)
    //filterStream.print()

    env.execute("transform test")
  }
}

class MyReduceFunction extends  ReduceFunction[SensorReading2]{
  override def reduce(value1: SensorReading2, value2: SensorReading2): SensorReading2 = {
    SensorReading2(value1.id,value2.timestamp,value1.temperature.min(value2.temperature))
  }
}

class MyFilter extends FilterFunction[SensorReading2]{
  override def filter(t: SensorReading2): Boolean = {
    t.id.startsWith("sensor_1")
  }
}

//富函数,可以获取到运行时上下文,还有一些生命周期
class MyRichMapper extends RichMapFunction[SensorReading2,String]{


  override def open(parameters: Configuration): Unit = {
    //做一些初始化操作 比如 数据库的链接
    getRuntimeContext()
  }

  override def map(in: SensorReading2): String = {
    in.id + "temperature"
  }

  override def close(): Unit = {
    //一般做收尾工作,比如关闭链接,或者清空状态
  }
}

 

你可能感兴趣的:(flink)