flink 加载外部cep规则

主程序代码:

package cepengine.App;

import cepengine.domain.Event;
import cepengine.domain.InputEventSchema;
import cepengine.domain.OutputEventSchema;
import groovy.lang.GroovyClassLoader;
import groovy.lang.GroovyObject;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.Properties;

public class FlinkKafkaSimpleSchema {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        /**
         * 设置检查点
         */
        env.enableCheckpointing(5000);
        ObjectMapper mapper = new ObjectMapper();
        try {

            /**
             * 加载外部规则
             */
            GroovyClassLoader loader = new GroovyClassLoader();
            File file = new File("./src/main/java/cepengine/scripts/sRule.groovy");
            Class aClass = loader.parseClass(file);
            GroovyObject groovyObject = (GroovyObject) aClass.newInstance();
            Pattern pattern = (Pattern) groovyObject.invokeMethod("run", null);

            /** 初始化 Consumer 配置 */
            Properties consumerConfig = new Properties();
            consumerConfig.setProperty("bootstrap.servers", "localhost:9092");
            consumerConfig.setProperty("group.id", "risk_control");


            /** 初始化 Kafka Consumer */
            FlinkKafkaConsumer flinkKafkaConsumer =
                    new FlinkKafkaConsumer(
                          
                            "flink_kafka_poc_input",
                           
                            new InputEventSchema(),

                            consumerConfig
                    );
            /** 配置offset */
            flinkKafkaConsumer.setStartFromEarliest();

            /** 将 Kafka Consumer 加入到流处理 */
            DataStream stream = env.addSource(flinkKafkaConsumer);



            /**
             * 匹配规则
             */
            PatternStream patternStream = CEP.pattern(stream, pattern);
            DataStream outstream = patternStream.select(new PatternSelectFunction() {
                @Override
                public Event select(Map> map) throws Exception {
                    List next = map.get("next");
                    return new Event(next.get(0).getKey(), next.get(0).getValue(), next.get(0).getTopic(),next.get(0).getPartition(),next.get(0).getOffset());
                }
            });
            outstream.print("next");


            /** 初始化 Producer 配置 */
            Properties producerConfig = new Properties();
            producerConfig.setProperty("bootstrap.servers", "localhost:9092");
            producerConfig.setProperty("max.request.size", "102428800");

            /** 初始化 Kafka Producer */
            FlinkKafkaProducer myProducer = new FlinkKafkaProducer(
                    "flink_kafka_poc_output",
                    new OutputEventSchema(),
                    producerConfig
            );

            /** 将 Kafka Producer 加入到流处理 */
            outstream.addSink(myProducer);


            /** 执行 */
            env.execute();
        } catch (Exception e) {

        }
    }


}

 

Groovy脚本:

package cepengine.scripts

import cepengine.domain.Event
import org.apache.flink.cep.pattern.Pattern
import org.apache.flink.cep.pattern.conditions.SimpleCondition
import org.apache.flink.streaming.api.windowing.time.Time

class sRule implements Serializable{
    def run() {
        Pattern pattern =Pattern.begin("begin")
                .where(new SimpleCondition() {
            @Override
            boolean filter(Event event) throws Exception {
                return  event.getValue().contains("失败")
            }
        })

                .next("next")
                .where(new SimpleCondition() {
            @Override
            boolean filter(Event event) throws Exception {
                return event.getValue().contains("失败")
            }
        })

//
//                .next("next2")
//                .where(new SimpleCondition() {
//            @Override
//            boolean filter(LoginEvent loginEvent) throws Exception {
//                return loginEvent.getType().equals("success")
//            }
//        })

                .within(Time.seconds(5))
        return pattern
    }
}

kafka consumer schema:

package cepengine.domain;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema;

import java.io.IOException;
import java.nio.charset.StandardCharsets;

import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass;

public class InputEventSchema implements KeyedDeserializationSchema {

    @Override

    public Event deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) throws IOException {

        String msg = new String(message, StandardCharsets.UTF_8);

        String key = null;

        if (messageKey != null) {

            key = new String(messageKey, StandardCharsets.UTF_8);

        }

        return new Event( key, msg,topic, partition, offset);

    }


    @Override
    public boolean isEndOfStream(Event nextElement) {

        return false;

    }

    @Override
    public TypeInformation getProducedType() {

        return getForClass(Event.class);

    }

}

kafka producer schema:

package cepengine.domain;

import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema;

public class OutputEventSchema implements KeyedSerializationSchema {

    @Override
    public byte[] serializeKey(Event event) {
        return event.getKey().getBytes();
    }

    @Override
    public byte[] serializeValue(Event event) {
        return event.getValue().getBytes();
    }

    @Override
    public String getTargetTopic(Event event) {
        return null;
    }

}

Event类:

package cepengine.domain;

public class Event {


    private String topic;


    private int partition;


    private long offset;


    private String value;

    private  String key;


    @Override

    public String toString() {
        return "Event{" +
                "topic='" + topic + '\'' +
                ", partition=" + partition +
                ", offset=" + offset +
                ", value='" + value + '\'' +
                ", key='" + key + '\'' +
                '}';

    }





    public Event() {

    }


    public Event(String key, String value, String topic, int partition, long offset) {

        this.key = key;
        this.value = value;

        this.topic = topic;

        this.partition = partition;

        this.offset = offset;

    }


    public String getKey() {

        return key;

    }


    public void setKey(String key) {

        this.key = key;

    }

    public String getTopic() {

        return topic;

    }

    public void setTopic(String topic) {

        this.topic = topic;

    }


    public int getPartition() {

        return partition;

    }


    public void setPartition(int partition) {

        this.partition = partition;

    }


    public long getOffset() {

        return offset;

    }


    public void setOffset(long offset) {

        this.offset = offset;

    }


    public String getValue() {

        return value;

    }


    public void setValue(String Value) {

        this.value = value;

    }

}

目前只是基于文件加载规则,后续改造为基于数据库加载规则。

你可能感兴趣的:(大数据)