package com.coder.flink.core.aaa_spark;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import java.util.Properties;
/**
* 统计kafka 的数据
*/
public class StormTimeCount {
public static void main(String[] args) {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//todo 获取kafka的配置属性
args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
"--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};
ParameterTool parameterTool = ParameterTool.fromArgs(args);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties pros = parameterTool.getProperties();
// //todo 指定输入数据为kafka topic
DataStream kafkaDstream = env.addSource(new FlinkKafkaConsumer010(
"wxgz_dianyou_topic",
// "dianyou_filter",
new SimpleStringSchema(),
// pros).setStartFromEarliest()
pros).setStartFromLatest()
).setParallelism(6);
//todo 拿到字段统计
DataStream logDstream = kafkaDstream.filter(new FilterFunction() {
@Override
public boolean filter(String value) throws Exception {
JSONObject logJson = JSON.parseObject(value);
if (!logJson.containsKey("nginx_storm")) {
return false;
}
return true;
}
}).map(new MapFunction() {
@Override
public JSONObject map(String value) throws Exception {
JSONObject logJson_old = JSON.parseObject(value);
JSONObject logJson_next = new JSONObject();
logJson_next.put("nginx_storm", logJson_old.getLong("nginx_storm"));
logJson_next.put("flume2_storm", logJson_old.getLong("flume2_storm"));
logJson_next.put("nginx_flume2", logJson_old.getLong("nginx_flume2"));
// logJson_next.put("topicName", logJson_old.getString("topicName"));
logJson_next.put("id", "aa");
Long boltA_cha = logJson_old.getLong("boltA_cha");
Long boltB_cha = logJson_old.getLong("boltB_cha");
Long boltC_cha = logJson_old.getLong("boltC_cha");
Long boltD_cha = logJson_old.getLong("boltD_cha");
Long boltE_cha = logJson_old.getLong("boltE_cha");
logJson_next.put("boltA_cha", boltA_cha);
logJson_next.put("boltB_cha", boltB_cha);
logJson_next.put("boltC_cha", boltC_cha);
logJson_next.put("boltD_cha", boltD_cha);
logJson_next.put("boltE_cha", boltE_cha);
return logJson_next;
}
}).setParallelism(6);
//todo 做统计
DataStream lastLogDstream = logDstream.keyBy(new KeySelector() {
@Override
public String getKey(JSONObject value) throws Exception {
return value.getString("id");
}
}).map(new RichMapFunction() {
private transient ValueState valueState;
private transient ListState itemState;
private transient ListState listA;
private transient ListState listB;
private transient ListState listC;
private transient ListState listD;
private transient ListState listE;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
valueState = getRuntimeContext().
getState(new ValueStateDescriptor<>("valueState", JSONObject.class));
ListStateDescriptor listStateDesc = new ListStateDescriptor<>(
"errorValue",
Long.class);
itemState = getRuntimeContext().getListState(listStateDesc);
ListStateDescriptor listStateA = new ListStateDescriptor<>(
"listStateA",
Long.class);
listA = getRuntimeContext().getListState(listStateA);
ListStateDescriptor listStateB = new ListStateDescriptor<>(
"listStateB",
Long.class);
listB = getRuntimeContext().getListState(listStateB);
ListStateDescriptor listStateC = new ListStateDescriptor<>(
"listStateC",
Long.class);
listC = getRuntimeContext().getListState(listStateC);
ListStateDescriptor listStateD = new ListStateDescriptor<>(
"listStateD",
Long.class);
listD = getRuntimeContext().getListState(listStateD);
ListStateDescriptor listStateE = new ListStateDescriptor<>(
"listStateE",
Long.class);
listE = getRuntimeContext().getListState(listStateE);
}
@Override
public JSONObject map(JSONObject log) throws Exception {
Long nginx_storm = log.getLong("nginx_storm");
//todo 统计storm 各个节点的
Long boltA_cha = log.getLong("boltA_cha");
Long boltB_cha = log.getLong("boltB_cha");
Long boltC_cha = log.getLong("boltC_cha");
Long boltD_cha = log.getLong("boltD_cha");
Long boltE_cha = log.getLong("boltE_cha");
// Long flume2_storm = value.getLong("flume2_storm");
// Long nginx_flume2 = value.getLong("nginx_flume2");
// String topicName = value.getString("topicName");
JSONObject state = valueState.value();
try {
if (state == null) {
JSONObject countvalue = new JSONObject();
countvalue.put("nginx_storm_sum", nginx_storm);
countvalue.put("nginx_storm_count", 1);
countvalue.put("max_value", nginx_storm);
countvalue.put("mix_value", nginx_storm);
valueState.update(countvalue);
log.put("总记录数", 1);
log.put("最小值", nginx_storm);
log.put("最大值", nginx_storm);
log.put("平均值", nginx_storm);
//异常的数据
if (nginx_storm > 7000L) {
itemState.add(nginx_storm);
log.put("异常数据总数", itemState.get());
}
} else {
//存在值,更新统计
Long nginx_storm_sum = state.getLong("nginx_storm_sum");
Long nginx_storm_count = state.getLong("nginx_storm_count");
Long max_value = state.getLong("max_value");
Long mix_value = state.getLong("mix_value");
Long new_sum = nginx_storm_sum + nginx_storm;
Long new_count = nginx_storm_count + 1;
if (nginx_storm < mix_value) {
state.put("mix_value", nginx_storm);
}
if (nginx_storm > max_value) {
state.put("max_value", nginx_storm);
}
state.put("nginx_storm_sum", new_sum);
state.put("nginx_storm_count", new_count);
//异常的数据
if (nginx_storm > 7000L) {
itemState.add(nginx_storm);
}
valueState.update(state);
log.put("总记录数", new_count);
log.put("最小值", state.getLong("mix_value"));
log.put("最大值", state.getLong("max_value"));
log.put("平均值", new_sum / new_count);
log.put("异常数据汇总", itemState.get());
if (boltA_cha > 7000L) {
listA.add(boltA_cha);
}
if (boltB_cha > 7000L) {
listB.add(boltA_cha);
}
if (boltC_cha > 7000L) {
listC.add(boltA_cha);
}
if (boltD_cha > 7000L) {
listD.add(boltA_cha);
}
if (boltE_cha > 7000L) {
listE.add(boltA_cha);
}
log.put("listA", listA.get());
log.put("listB", listB.get());
log.put("listC", listC.get());
log.put("listD", listD.get());
log.put("listE", listE.get());
}
} catch (Exception ex) {
ex.printStackTrace();
}
return log;
}
});
lastLogDstream.addSink(new SinkToRedis());
lastLogDstream.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
//指定Redis key并将flink数据类型映射到Redis数据类型
public static class SinkToRedis extends RichSinkFunction {
private Jedis redisCon = null;
private JedisPoolConfig config = null;
private JedisPool pool = null;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
config = new JedisPoolConfig();
config.setMaxTotal(500);
config.setMaxIdle(5);
config.setMaxWaitMillis(1000 * 3600);
config.setTestOnBorrow(true);
config = new JedisPoolConfig();
pool = new JedisPool(config, "172.10.4.144", 6379, 20000, "7yxNFH8pcrII");
redisCon = pool.getResource();
}
@Override
public void invoke(JSONObject json, Context context) throws Exception {
if (json.getString("listA") !=null){
this.redisCon.hset("aaa_test", "listA", json.getString("listA"));
}
if (json.getString("listB") !=null){
this.redisCon.hset("aaa_test", "listB", json.getString("listB"));
}
if (json.getString("listC") !=null){
this.redisCon.hset("aaa_test", "listC", json.getString("listC"));
}
if (json.getString("listD") !=null){
this.redisCon.hset("aaa_test", "listD", json.getString("listD"));
}
if (json.getString("listE") !=null){
this.redisCon.hset("aaa_test", "listE", json.getString("listE"));
}
//存储相应的key
String aaaa = json.getString("异常数据汇总");
if (aaaa !=null ){
this.redisCon.hset("aaa_test", "异常数据汇总",aaaa);
}
this.redisCon.hset("aaa_test", "总记录数", json.getString("总记录数"));
this.redisCon.hset("aaa_test", "最小值", json.getString("最小值"));
this.redisCon.hset("aaa_test", "最大值", json.getString("最大值"));
this.redisCon.hset("aaa_test", "平均值", json.getString("平均值"));
}
@Override
public void close() throws Exception {
super.close();
if (this.redisCon != null) {
this.redisCon.close();
}
}
}
}
redis效果图: