Flink简单的统计异常数据,写入到redis里面。

package com.coder.flink.core.aaa_spark;


import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.functions.KeySelector;

 
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;

 
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
 
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;

import java.util.Properties;

/**
 * 统计kafka 的数据
 */
public class StormTimeCount {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //todo 获取kafka的配置属性
        args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
                "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        Properties pros = parameterTool.getProperties();
//        //todo 指定输入数据为kafka topic
        DataStream kafkaDstream = env.addSource(new FlinkKafkaConsumer010(
                        "wxgz_dianyou_topic",
//                "dianyou_filter",
                        new SimpleStringSchema(),
//                pros).setStartFromEarliest()
                        pros).setStartFromLatest()

        ).setParallelism(6);
        //todo 拿到字段统计 
        DataStream logDstream = kafkaDstream.filter(new FilterFunction() {
            @Override
            public boolean filter(String value) throws Exception {
                JSONObject logJson = JSON.parseObject(value);
                if (!logJson.containsKey("nginx_storm")) {
                    return false;
                }
                return true;
            }
        }).map(new MapFunction() {
            @Override
            public JSONObject map(String value) throws Exception {
                JSONObject logJson_old = JSON.parseObject(value);
                JSONObject logJson_next = new JSONObject();
                logJson_next.put("nginx_storm", logJson_old.getLong("nginx_storm"));
                logJson_next.put("flume2_storm", logJson_old.getLong("flume2_storm"));
                logJson_next.put("nginx_flume2", logJson_old.getLong("nginx_flume2"));
//                logJson_next.put("topicName", logJson_old.getString("topicName"));
                logJson_next.put("id", "aa");


                Long boltA_cha = logJson_old.getLong("boltA_cha");
                Long boltB_cha = logJson_old.getLong("boltB_cha");
                Long boltC_cha = logJson_old.getLong("boltC_cha");
                Long boltD_cha = logJson_old.getLong("boltD_cha");
                Long boltE_cha = logJson_old.getLong("boltE_cha");

                logJson_next.put("boltA_cha", boltA_cha);
                logJson_next.put("boltB_cha", boltB_cha);
                logJson_next.put("boltC_cha", boltC_cha);
                logJson_next.put("boltD_cha", boltD_cha);
                logJson_next.put("boltE_cha", boltE_cha);


                return logJson_next;
            }
        }).setParallelism(6);

        //todo 做统计
        DataStream lastLogDstream = logDstream.keyBy(new KeySelector() {
            @Override
            public String getKey(JSONObject value) throws Exception {
                return value.getString("id");
            }
        }).map(new RichMapFunction() {
            private transient ValueState valueState;
            private transient ListState itemState;
            private transient ListState listA;
            private transient ListState listB;
            private transient ListState listC;
            private transient ListState listD;
            private transient ListState listE;


            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                valueState = getRuntimeContext().
                        getState(new ValueStateDescriptor<>("valueState", JSONObject.class));

                ListStateDescriptor listStateDesc = new ListStateDescriptor<>(
                        "errorValue",
                        Long.class);
                itemState = getRuntimeContext().getListState(listStateDesc);

                ListStateDescriptor listStateA = new ListStateDescriptor<>(
                        "listStateA",
                        Long.class);
                listA = getRuntimeContext().getListState(listStateA);

                ListStateDescriptor listStateB = new ListStateDescriptor<>(
                        "listStateB",
                        Long.class);
                listB = getRuntimeContext().getListState(listStateB);

                ListStateDescriptor listStateC = new ListStateDescriptor<>(
                        "listStateC",
                        Long.class);
                listC = getRuntimeContext().getListState(listStateC);

                ListStateDescriptor listStateD = new ListStateDescriptor<>(
                        "listStateD",
                        Long.class);
                listD = getRuntimeContext().getListState(listStateD);

                ListStateDescriptor listStateE = new ListStateDescriptor<>(
                        "listStateE",
                        Long.class);
                listE = getRuntimeContext().getListState(listStateE);
            }

            @Override
            public JSONObject map(JSONObject log) throws Exception {
                Long nginx_storm = log.getLong("nginx_storm");

                //todo 统计storm 各个节点的


                Long boltA_cha = log.getLong("boltA_cha");
                Long boltB_cha = log.getLong("boltB_cha");
                Long boltC_cha = log.getLong("boltC_cha");
                Long boltD_cha = log.getLong("boltD_cha");
                Long boltE_cha = log.getLong("boltE_cha");

//                Long flume2_storm = value.getLong("flume2_storm");
//                Long nginx_flume2 = value.getLong("nginx_flume2");
//                String topicName = value.getString("topicName");

                JSONObject state = valueState.value();
                try {
                    if (state == null) {
                        JSONObject countvalue = new JSONObject();
                        countvalue.put("nginx_storm_sum", nginx_storm);
                        countvalue.put("nginx_storm_count", 1);
                        countvalue.put("max_value", nginx_storm);
                        countvalue.put("mix_value", nginx_storm);
                        valueState.update(countvalue);
                        log.put("总记录数", 1);
                        log.put("最小值", nginx_storm);
                        log.put("最大值", nginx_storm);
                        log.put("平均值", nginx_storm);
                        //异常的数据
                        if (nginx_storm > 7000L) {
                            itemState.add(nginx_storm);
                            log.put("异常数据总数", itemState.get());
                        }


                    } else {

                        //存在值,更新统计
                        Long nginx_storm_sum = state.getLong("nginx_storm_sum");
                        Long nginx_storm_count = state.getLong("nginx_storm_count");

                        Long max_value = state.getLong("max_value");
                        Long mix_value = state.getLong("mix_value");


                        Long new_sum = nginx_storm_sum + nginx_storm;
                        Long new_count = nginx_storm_count + 1;
                        if (nginx_storm < mix_value) {
                            state.put("mix_value", nginx_storm);
                        }
                        if (nginx_storm > max_value) {
                            state.put("max_value", nginx_storm);
                        }

                        state.put("nginx_storm_sum", new_sum);
                        state.put("nginx_storm_count", new_count);


                        //异常的数据
                        if (nginx_storm > 7000L) {
                            itemState.add(nginx_storm);
                        }

                        valueState.update(state);
                        log.put("总记录数", new_count);
                        log.put("最小值", state.getLong("mix_value"));
                        log.put("最大值", state.getLong("max_value"));
                        log.put("平均值", new_sum / new_count);
                        log.put("异常数据汇总", itemState.get());


                        if (boltA_cha > 7000L) {
                            listA.add(boltA_cha);
                        }

                        if (boltB_cha > 7000L) {
                            listB.add(boltA_cha);
                        }

                        if (boltC_cha > 7000L) {
                            listC.add(boltA_cha);
                        }

                        if (boltD_cha > 7000L) {
                            listD.add(boltA_cha);
                        }

                        if (boltE_cha > 7000L) {
                            listE.add(boltA_cha);
                        }
                        log.put("listA", listA.get());
                        log.put("listB", listB.get());
                        log.put("listC", listC.get());
                        log.put("listD", listD.get());
                        log.put("listE", listE.get());

                    }
                } catch (Exception ex) {
                    ex.printStackTrace();
                }


                return log;
            }
        });


        lastLogDstream.addSink(new SinkToRedis());
        lastLogDstream.print();

        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    //指定Redis key并将flink数据类型映射到Redis数据类型
    public static class SinkToRedis extends RichSinkFunction {
        private Jedis redisCon = null;
        private JedisPoolConfig config = null;
        private JedisPool pool = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);

            config = new JedisPoolConfig();

            config.setMaxTotal(500);

            config.setMaxIdle(5);

            config.setMaxWaitMillis(1000 * 3600);

            config.setTestOnBorrow(true);
            config = new JedisPoolConfig();
            pool = new JedisPool(config, "172.10.4.144", 6379, 20000, "7yxNFH8pcrII");
            redisCon = pool.getResource();
        }

        @Override
        public void invoke(JSONObject json, Context context) throws Exception {
            if (json.getString("listA") !=null){
                this.redisCon.hset("aaa_test", "listA", json.getString("listA"));
            }

            if (json.getString("listB") !=null){
                this.redisCon.hset("aaa_test", "listB", json.getString("listB"));
            }

            if (json.getString("listC") !=null){
                this.redisCon.hset("aaa_test", "listC", json.getString("listC"));
            }

            if (json.getString("listD") !=null){
                this.redisCon.hset("aaa_test", "listD", json.getString("listD"));
            }

            if (json.getString("listE") !=null){
                this.redisCon.hset("aaa_test", "listE", json.getString("listE"));
            }
            //存储相应的key
            String aaaa = json.getString("异常数据汇总");
            if (aaaa !=null ){
                this.redisCon.hset("aaa_test", "异常数据汇总",aaaa);
            }

            this.redisCon.hset("aaa_test", "总记录数", json.getString("总记录数"));
            this.redisCon.hset("aaa_test", "最小值", json.getString("最小值"));
            this.redisCon.hset("aaa_test", "最大值", json.getString("最大值"));
            this.redisCon.hset("aaa_test", "平均值", json.getString("平均值"));
        }

        @Override
        public void close() throws Exception {
            super.close();
            if (this.redisCon != null) {
                this.redisCon.close();
            }
        }


    }
}

 

redis效果图:

 

Flink简单的统计异常数据,写入到redis里面。_第1张图片

你可能感兴趣的:(Flink)