Flink CEP基础学习与使用04----实际业务场景开发案例demo

目标:针对日志,做场景匹配demo,这个demo先发出来吧,感觉乱七八糟的,等过一段时间再思考一下怎么写,这个写的太乱了

import com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.cep.scala.{CEP, PatternStream}
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010

import scala.collection.Map

object Storm2FlinkCEP_demo {
  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val proString = Array[String]("--input-topic",
      "dianyou_wxgz_test2",
      "--output-topic",
      "dianyou_wxgz_test3",
      "--bootstrap.servers",
      "172.1.1.141:9092,172.1.1.142:9092,172.1.1.143:9092",
      "--zookeeper.connect",
      "172.10.4.63:2181,172.10.4.64:2181,172.10.4.65:2181",
      "--group.id", "cc")

    val parameterTool = ParameterTool.fromArgs(proString)


    val kafkaDstream: DataStream[String] = env.addSource(new FlinkKafkaConsumer010[String]("dianyou_wxgz_test2",
      new SimpleStringSchema,
      parameterTool.getProperties)
      //todo 然后写程序,是看怎么匹配的
    )


    //todo 这是模拟的Map数据
    val redisMap = new util.HashMap[String, String]()
    redisMap.put("row_01", "{'id':1,'A':'bbb','B':'aaa'}")
    redisMap.put("row_02", "{'id':2,'A':'bbb','B':'ccc'}")
    redisMap.put("row_03", "{'id':3,'A':'bbb','B':'ddd'}")
    redisMap.put("row_04", "{'id':4,'A':'aaa','B':'ccc'}")

    // 将kafka的数据转成JSON
    val jsonDstream: DataStream[JSONObject] = kafkaDstream.map(line => {
      val rs = JSON.parseObject(line)
      rs
    }).setParallelism(1)

    // 定义模式
    val pattern = Pattern.begin[JSONObject]("start")
      // todo 第一次匹配
      .where(log => {
      val rowId = log.getString("rowId")
      val user = log.getString("user")

      if (redisMap.containsKey(rowId)) {
        val redisStr = redisMap.get(rowId)
        val redisJson = JSON.parseObject(redisStr)

        redisJson.getString("A").equals(user) || redisJson.getString("B").equals(user)

      } else {
        false
      }

    })
      .followedBy("middle") //todo 到这里的话只剩两条日志了。
      .where(line => {
      println("到middle的数据:"+line)

      val rowId = line.getString("rowId")
      val user = line.getString("user")

      if (redisMap.containsKey(rowId)) {
        val redisStr = redisMap.get(rowId)
        val redisJson = JSON.parseObject(redisStr)

        if (redisJson.getString("A").equals(user) || redisJson.getString("B").equals(user)) {
       
          true
        } else {
          
          false
        }

      } else {
       
        false
      }
    }).oneOrMore
      .until(event=>{
        event.containsKey("end") && event.getString("end").equals("true")
      } )



    // 拿到结果
    val dataStreamPattern: PatternStream[JSONObject] = CEP.pattern(jsonDstream, pattern)

    val rsDstream: DataStream[JSONObject] = dataStreamPattern.select((pat: Map[String, Iterable[JSONObject]]) => {
      val rs: JSONObject = pat.get("middle").get.iterator.next()
      rs
    })
    rsDstream.print()



    //执行
    env.execute("execute Flink App")

  }
}

你可能感兴趣的:(Flink,CEP)