Flink 简单统计参数代码

package com.coder.flink.core.aaa_spark;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.collections.map.HashedMap;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Properties;

/**
 *  计算nginx到 flume1到flume2的离线时间统计
 */
public class FlumeTime {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //todo 获取kafka的配置属性
        args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
                "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        Properties pros = parameterTool.getProperties();

        //todo 指定偏移量消费
        Map offsets = new HashedMap();
        offsets.put(new KafkaTopicPartition("dianyou_wxgz2", 0), 17578573L);


//        //todo 指定输入数据为kafka topic
        DataStream kafkaDstream = env.addSource(new FlinkKafkaConsumer010(
//                        "wxgz_dianyou_topic",
                        "dianyou_wxgz",
//                "dianyou_filter",
                        new SimpleStringSchema(),
//                        pros).setStartFromSpecificOffsets(offsets)
//                        pros).setStartFromEarliest()
                        pros).setStartFromLatest()

        ).setParallelism(6);
        //todo 拿到字段统计
        DataStream logDstream = kafkaDstream.map(new MapFunction() {
            @Override
            public JSONObject map(String value) throws Exception {
                JSONObject logJson_old = JSON.parseObject(value);
                JSONObject logJson_next = new JSONObject();
                logJson_next.put("deviceId", logJson_old.getString("deviceId"));
                logJson_next.put("flume1Time", logJson_old.getLong("flume1Time"));
                logJson_next.put("flume2Time", logJson_old.getLong("flume2Time"));
                logJson_next.put("urlTimestamp", Long.parseLong(logJson_old.getString("urlTimestamp").replace(".", "")));
                logJson_next.put("id", "aa");
                return logJson_next;
            }
        }).setParallelism(6);

        //todo 做统计
        DataStream lastLogDstream = logDstream.keyBy(new KeySelector() {
            @Override
            public String getKey(JSONObject value) throws Exception {
//                return value.getString("deviceId");
                return value.getString("id");
            }
        }).map(new RichMapFunction() {
            private transient ValueState valueState;

            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                valueState = getRuntimeContext().
                        getState(new ValueStateDescriptor<>("valueState", JSONObject.class));

            }

            @Override
            public JSONObject map(JSONObject log) throws Exception {

                String deviceId = log.getString("deviceId");
                Long flume1Time = log.getLong("flume1Time");
                Long flume2Time = log.getLong("flume2Time");
                Long urlTimestamp = log.getLong("urlTimestamp");


                //todo 统计storm 各个节点的

                JSONObject state = valueState.value();
                try {
                    if (state == null) {
                        JSONObject countvalue = new JSONObject();

                        countvalue.put("flink_count", 1);
                        countvalue.put("total_flume1", flume1Time);
                        countvalue.put("total_flume2", flume2Time);
                        countvalue.put("total_urlTimestamp", urlTimestamp);

                        valueState.update(countvalue);

                        log.put("flink_count", 1);
                        log.put("nginx_flume1_avg", flume1Time - urlTimestamp);
                        log.put("flume1_flume2_avg", flume2Time - flume1Time);
                        log.put("nginx_flume1_time", flume1Time - urlTimestamp);
                        log.put("flume1_flume2_time", flume2Time - flume1Time);


                        //异常的数据


                    } else {

                        //存在值,更新统计
                        Long flink_count = state.getLong("flink_count");
                        Long total_flume1 = state.getLong("total_flume1");
                        Long total_flume2 = state.getLong("total_flume2");
                        Long total_urlTimestamp = state.getLong("total_urlTimestamp");


                        Long flink_count_new = flink_count + 1;
                        Long total_flume1_new = total_flume1 + flume1Time;
                        Long total_flume2_new = total_flume2 + flume2Time;
                        Long total_urlTimestamp_new = total_urlTimestamp + urlTimestamp;


                        state.put("flink_count", flink_count_new);
                        state.put("total_flume1", total_flume1_new);
                        state.put("total_flume2", total_flume2_new);
                        state.put("total_urlTimestamp", total_urlTimestamp_new);
                        valueState.update(state);

                        //下发到日志,新增5个字段,
                        log.put("flink_count", flink_count_new);
                        log.put("nginx_flume1_time", (flume1Time - urlTimestamp));
                        log.put("nginx_flume1_avg", (total_flume1_new - total_urlTimestamp_new) / flink_count_new);

                        log.put("flume1_flume2_time", (flume2Time - flume1Time));
                        log.put("flume1_flume2_avg", (total_flume2_new - total_flume1_new)/ flink_count_new);





                    }
                } catch (Exception ex) {
                    ex.printStackTrace();
                }


                return log;
            }
        }).setParallelism(8);
//        lastLogDstream.print();
        lastLogDstream.writeAsText("C:\\Users\\Administrator\\Desktop\\flume_result.txt").setParallelism(1);
//        System.out.println(11111);

//        lastLogDstream.print();

        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
package com.coder.flink.core.aaa_spark;

import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.configuration.Configuration;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * 统计Nginx到客户端时间
 */
public class NginxToClient_time {
    public static void main(String[] args) throws Exception {


        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
        DataSet text = env.readTextFile("C:\\Users\\Administrator\\Desktop\\tuisong.log");
        DataSet flink_result = env.readTextFile("C:\\Users\\Administrator\\Desktop\\topic.txt");


        DataSet mapDataSet = text.map(new RichMapFunction() {
            private SimpleDateFormat df;
            private SimpleDateFormat df2;
            private String year_mm_dd;
            private JSONObject json;
            private LongCounter numLines;
            private Long client_time_sum = 0L;
            private Long client_count = 0L;

            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                df = new SimpleDateFormat("yyyy-MM-dd");
                df2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                Date t = new Date();
                year_mm_dd = df.format(t);
                numLines = new LongCounter();

            }

            @Override
            public JSONObject map(String log) throws Exception {

                String[] split = log.split(" ");

                //时分秒
                String log_year_hh_mm = split[0].substring(0, split[0].length() - 7);

                String ms = split[0].substring(split[0].length() - 6, split[0].length() - 3);

//                String timeStr =year_mm_dd+" "+log_year_hh_mm;
                String timeStr = "2019-8-21" + " " + log_year_hh_mm;

                Date date = df2.parse(timeStr);

                String did = split[5].substring(7);
                long ts = date.getTime() + Long.parseLong(ms);
                json = new JSONObject();


                client_time_sum += ts;
                client_count += 1;
                json.put("id", "aaa");
                json.put("did", did);
                json.put("client_time", ts);
                json.put("client_count", client_count);
                json.put("client_time_sum", client_time_sum);

                return json;
            }
        }).setParallelism(1);

        //todo 读取发送到客户端的数据
        DataSet> clientDataSet = mapDataSet.map(new MapFunction>() {
            @Override
            public Tuple5 map(JSONObject value) throws Exception {

                String did = value.getString("did");
                Long client_time = value.getLong("client_time");
                Long client_count = value.getLong("client_count");
                Long client_total_time = value.getLong("client_time_sum");

                return new Tuple5(did, client_time, client_count, client_total_time, client_total_time);
            }
        });

//        clientDataSet.print();


        //todo 对flink统计结果进行过滤
        DataSet> flinkDataSet = flink_result.map(new RichMapFunction>() {


            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
            }

            @Override
            public Tuple5 map(String value) {
                JSONObject flinkJson = JSONObject.parseObject(value);
                try {


                    String deviceId = flinkJson.getString("deviceId");
                    Long nginx_storm_time = flinkJson.getLong("nginx_storm_time");
                    Long cunrrent_time = flinkJson.getLong("cunrrent_time");
                    Long nginx_storm_avg = flinkJson.getLong("nginx_storm_avg");
                    Long flink_count = flinkJson.getLong("flink_count");
//                    Long total_time = flinkJson.getLong("total_time");


                    //todo 返回值为 [deviceId,日志当前时间戳,nginx到storm花费时间差,nginx到storm花费时间差平均值,总时间累加]
                    return new Tuple5<>(deviceId, cunrrent_time, nginx_storm_time, nginx_storm_avg, flink_count);
                } catch (Exception ex) {
                    System.out.println("错误的flinkJson = " + flinkJson);
                }
                return null;
            }

        });
//        flinkDataSet.print();

        DataSet> result = flinkDataSet.join(clientDataSet).where(0).equalTo(0).map(new MapFunction, Tuple5>, Tuple5>() {
            private long client_total_time = 0L;
            private long flink_total_time = 0L;
            private long log_scount = 0L;

            @Override
            public Tuple5 map(Tuple2, Tuple5> value) throws Exception {

                //todo flink统计数据
                Tuple5 flink_tuple = value.f0;
                String did = flink_tuple.f0;
                long flink_cunrrent_time = flink_tuple.f1;
                long nginx_storm_time = flink_tuple.f2;
                long nginx_storm_avg = flink_tuple.f3;
                long flink_count = flink_tuple.f4;
                flink_total_time += flink_cunrrent_time;

                //todo 统计topic到客户端平均时间
                Tuple5 client_tuple = value.f1;
                long client_time = client_tuple.f1;
                Long client_count = client_tuple.f2;
                client_total_time += client_time;

                log_scount++;

                //求差值
                long client_diff = client_time - flink_cunrrent_time;
//                if (did.equals("132485184289142")) {
//                    System.out.println("client_time:" + client_time + ",flink_cunrrent_time:" + flink_cunrrent_time + ",差值:" + client_diff);
//                }
                //均值
                long client_avg = (client_total_time - flink_total_time) / log_scount;
//                System.out.println("client_avg = " + client_avg);

                //todo did ,ng-storm-时间,平均值, 客户端-时间,平均值
                return new Tuple5(did, nginx_storm_time, nginx_storm_avg, client_diff, client_avg);
            }
        });
//         result.print();

        //todo 求总结果
//        AggregateOperator> aggregate = map.aggregate(Aggregations.SUM, 1);
//        aggregate.print();
        result.writeAsText("C:\\Users\\Administrator\\Desktop\\out3.txt").setParallelism(1);

        try {


            env.execute();

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

 

package com.coder.flink.core.aaa_spark;


import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;

import java.util.Properties;

/**
 * 读取 wxgz_dianyou_topic 数据写入文件
 */
public class WriteTopicLogsToFile {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //todo 获取kafka的配置属性
        args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091",
                "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"};

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        Properties pros = parameterTool.getProperties();
//        //todo 指定输入数据为kafka topic
        DataStream kafkaDstream = env.addSource(new FlinkKafkaConsumer010(
                        "wxgz_dianyou_topic",
//                "dianyou_filter",
                        new SimpleStringSchema(),
                pros).setStartFromEarliest()
//                        pros).setStartFromLatest()

        ).setParallelism(6);
        //todo 拿到字段统计 
        DataStream logDstream = kafkaDstream.filter(new FilterFunction() {
            @Override
            public boolean filter(String value) throws Exception {
                JSONObject logJson = JSON.parseObject(value);
                if (!logJson.containsKey("nginx_storm")) {
                    return false;
                }
                return true;
            }
        }).map(new MapFunction() {
            @Override
            public JSONObject map(String value) throws Exception {
                JSONObject logJson_old = JSON.parseObject(value);
                JSONObject logJson_next = new JSONObject();
                logJson_next.put("nginx_storm", logJson_old.getLong("nginx_storm"));
                logJson_next.put("deviceId", logJson_old.getLong("deviceId"));
                logJson_next.put("cunrrent_time", logJson_old.getLong("cunrrent_time"));
                logJson_next.put("id", "aa");
                return logJson_next;
            }
        }).setParallelism(6);

        //todo 做统计
        DataStream lastLogDstream = logDstream.keyBy(new KeySelector() {
            @Override
            public String getKey(JSONObject value) throws Exception {
//                return value.getString("deviceId");
                return value.getString("id");
            }
        }).map(new RichMapFunction() {
            private transient ValueState valueState;
            private transient ListState itemState;



            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                valueState = getRuntimeContext().
                        getState(new ValueStateDescriptor<>("valueState", JSONObject.class));

                ListStateDescriptor listStateDesc = new ListStateDescriptor<>(
                        "errorValue",
                        Long.class);
                itemState = getRuntimeContext().getListState(listStateDesc);


            }

            @Override
            public JSONObject map(JSONObject log) throws Exception {
                Long nginx_storm = log.getLong("nginx_storm");
                Long cunrrent_time = log.getLong("cunrrent_time");

                //todo 统计storm 各个节点的

                JSONObject state = valueState.value();
                try {
                    if (state == null) {
                        JSONObject countvalue = new JSONObject();
                        countvalue.put("nginx_storm_sum", nginx_storm);
                        countvalue.put("nginx_storm_count", 1);
                        countvalue.put("total_time", cunrrent_time);

                        valueState.update(countvalue);
                        log.put("flink_count", 1);
                        log.put("nginx_storm_avg", nginx_storm);
                        log.put("cunrrent_time", cunrrent_time);
                        log.put("total_time", cunrrent_time);
                        log.put("nginx_storm_time", nginx_storm);
                        //异常的数据
                        if (nginx_storm > 3000L) {
                            itemState.add(nginx_storm);
                            log.put("异常数据总数", itemState.get());
                        }


                    } else {

                        //存在值,更新统计
                        Long nginx_storm_sum = state.getLong("nginx_storm_sum");
                        Long nginx_storm_count = state.getLong("nginx_storm_count");
                        Long total_time = state.getLong("total_time");




                        Long new_sum = nginx_storm_sum + nginx_storm;
                        Long new_count = nginx_storm_count + 1;
                        Long new_total_time = total_time + cunrrent_time;


                        state.put("nginx_storm_sum", new_sum);
                        state.put("nginx_storm_count", new_count);
                        state.put("total_time", new_total_time);


                        //异常的数据
                        if (nginx_storm > 3000L) {
                            itemState.add(nginx_storm);
                        }


                        valueState.update(state);

                        //下发到日志,新增5个字段,

                        log.put("cunrrent_time", cunrrent_time);
                        log.put("nginx_storm_time", nginx_storm);

                        log.put("nginx_storm_avg", new_sum / new_count); //ng-storm的平均值

                        log.put("flink_count", new_count); //日志数量
                        log.put("total_time", new_total_time); //总时间

//                        log.put("异常数据汇总", itemState.get());


                    }
                } catch (Exception ex) {
                    ex.printStackTrace();
                }


                return log;
            }
        }).setParallelism(8);

        lastLogDstream.writeAsText("C:\\Users\\Administrator\\Desktop\\topic.txt").setParallelism(1);
//        System.out.println(11111);

//        lastLogDstream.print();

        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    //指定Redis key并将flink数据类型映射到Redis数据类型
    public static class SinkToRedis extends RichSinkFunction {
        private Jedis redisCon = null;
        private JedisPoolConfig config = null;
        private JedisPool pool = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);

            config = new JedisPoolConfig();

            config.setMaxTotal(500);

            config.setMaxIdle(5);

            config.setMaxWaitMillis(1000 * 3600);

            config.setTestOnBorrow(true);
            config = new JedisPoolConfig();
            pool = new JedisPool(config, "172.10.4.144", 6379, 20000, "7yxNFH8pcrII");
            redisCon = pool.getResource();
        }

        @Override
        public void invoke(JSONObject json, Context context) throws Exception {
            if (json.getString("listA") != null) {
                this.redisCon.hset("aaa_test", "listA", json.getString("listA"));
            }

            if (json.getString("listB") != null) {
                this.redisCon.hset("aaa_test", "listB", json.getString("listB"));
            }

            if (json.getString("listC") != null) {
                this.redisCon.hset("aaa_test", "listC", json.getString("listC"));
            }

            if (json.getString("listD") != null) {
                this.redisCon.hset("aaa_test", "listD", json.getString("listD"));
            }

            if (json.getString("listE") != null) {
                this.redisCon.hset("aaa_test", "listE", json.getString("listE"));
            }
            //存储相应的key
            String aaaa = json.getString("异常数据汇总");
            if (aaaa != null) {
                this.redisCon.hset("aaa_test", "异常数据汇总", aaaa);
            }

            this.redisCon.hset("aaa_test", "总记录数", json.getString("总记录数"));
            this.redisCon.hset("aaa_test", "最小值", json.getString("最小值"));
            this.redisCon.hset("aaa_test", "最大值", json.getString("最大值"));
            this.redisCon.hset("aaa_test", "平均值", json.getString("平均值"));
        }

        @Override
        public void close() throws Exception {
            super.close();
            if (this.redisCon != null) {
                this.redisCon.close();
            }
        }


    }
}

 

你可能感兴趣的:(Flink)