主程序代码:
package cepengine.App;
import cepengine.domain.Event;
import cepengine.domain.InputEventSchema;
import cepengine.domain.OutputEventSchema;
import groovy.lang.GroovyClassLoader;
import groovy.lang.GroovyObject;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class FlinkKafkaSimpleSchema {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
/**
* 设置检查点
*/
env.enableCheckpointing(5000);
ObjectMapper mapper = new ObjectMapper();
try {
/**
* 加载外部规则
*/
GroovyClassLoader loader = new GroovyClassLoader();
File file = new File("./src/main/java/cepengine/scripts/sRule.groovy");
Class aClass = loader.parseClass(file);
GroovyObject groovyObject = (GroovyObject) aClass.newInstance();
Pattern pattern = (Pattern) groovyObject.invokeMethod("run", null);
/** 初始化 Consumer 配置 */
Properties consumerConfig = new Properties();
consumerConfig.setProperty("bootstrap.servers", "localhost:9092");
consumerConfig.setProperty("group.id", "risk_control");
/** 初始化 Kafka Consumer */
FlinkKafkaConsumer flinkKafkaConsumer =
new FlinkKafkaConsumer(
"flink_kafka_poc_input",
new InputEventSchema(),
consumerConfig
);
/** 配置offset */
flinkKafkaConsumer.setStartFromEarliest();
/** 将 Kafka Consumer 加入到流处理 */
DataStream stream = env.addSource(flinkKafkaConsumer);
/**
* 匹配规则
*/
PatternStream patternStream = CEP.pattern(stream, pattern);
DataStream outstream = patternStream.select(new PatternSelectFunction() {
@Override
public Event select(Map> map) throws Exception {
List next = map.get("next");
return new Event(next.get(0).getKey(), next.get(0).getValue(), next.get(0).getTopic(),next.get(0).getPartition(),next.get(0).getOffset());
}
});
outstream.print("next");
/** 初始化 Producer 配置 */
Properties producerConfig = new Properties();
producerConfig.setProperty("bootstrap.servers", "localhost:9092");
producerConfig.setProperty("max.request.size", "102428800");
/** 初始化 Kafka Producer */
FlinkKafkaProducer myProducer = new FlinkKafkaProducer(
"flink_kafka_poc_output",
new OutputEventSchema(),
producerConfig
);
/** 将 Kafka Producer 加入到流处理 */
outstream.addSink(myProducer);
/** 执行 */
env.execute();
} catch (Exception e) {
}
}
}
Groovy脚本:
package cepengine.scripts
import cepengine.domain.Event
import org.apache.flink.cep.pattern.Pattern
import org.apache.flink.cep.pattern.conditions.SimpleCondition
import org.apache.flink.streaming.api.windowing.time.Time
class sRule implements Serializable{
def run() {
Pattern pattern =Pattern.begin("begin")
.where(new SimpleCondition() {
@Override
boolean filter(Event event) throws Exception {
return event.getValue().contains("失败")
}
})
.next("next")
.where(new SimpleCondition() {
@Override
boolean filter(Event event) throws Exception {
return event.getValue().contains("失败")
}
})
//
// .next("next2")
// .where(new SimpleCondition() {
// @Override
// boolean filter(LoginEvent loginEvent) throws Exception {
// return loginEvent.getType().equals("success")
// }
// })
.within(Time.seconds(5))
return pattern
}
}
kafka consumer schema:
package cepengine.domain;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass;
public class InputEventSchema implements KeyedDeserializationSchema {
@Override
public Event deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) throws IOException {
String msg = new String(message, StandardCharsets.UTF_8);
String key = null;
if (messageKey != null) {
key = new String(messageKey, StandardCharsets.UTF_8);
}
return new Event( key, msg,topic, partition, offset);
}
@Override
public boolean isEndOfStream(Event nextElement) {
return false;
}
@Override
public TypeInformation getProducedType() {
return getForClass(Event.class);
}
}
kafka producer schema:
package cepengine.domain;
import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema;
public class OutputEventSchema implements KeyedSerializationSchema {
@Override
public byte[] serializeKey(Event event) {
return event.getKey().getBytes();
}
@Override
public byte[] serializeValue(Event event) {
return event.getValue().getBytes();
}
@Override
public String getTargetTopic(Event event) {
return null;
}
}
Event类:
package cepengine.domain;
public class Event {
private String topic;
private int partition;
private long offset;
private String value;
private String key;
@Override
public String toString() {
return "Event{" +
"topic='" + topic + '\'' +
", partition=" + partition +
", offset=" + offset +
", value='" + value + '\'' +
", key='" + key + '\'' +
'}';
}
public Event() {
}
public Event(String key, String value, String topic, int partition, long offset) {
this.key = key;
this.value = value;
this.topic = topic;
this.partition = partition;
this.offset = offset;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public int getPartition() {
return partition;
}
public void setPartition(int partition) {
this.partition = partition;
}
public long getOffset() {
return offset;
}
public void setOffset(long offset) {
this.offset = offset;
}
public String getValue() {
return value;
}
public void setValue(String Value) {
this.value = value;
}
}
目前只是基于文件加载规则,后续改造为基于数据库加载规则。