package com.coder.flink.core.aaa_spark; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import org.apache.commons.collections.map.HashedMap; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple5; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map; import java.util.Properties; /** * 计算nginx到 flume1到flume2的离线时间统计 */ public class FlumeTime { public static void main(String[] args) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //todo 获取kafka的配置属性 args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091", "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"}; ParameterTool parameterTool = ParameterTool.fromArgs(args); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); Properties pros = parameterTool.getProperties(); //todo 指定偏移量消费 Map
offsets = new HashedMap(); offsets.put(new KafkaTopicPartition("dianyou_wxgz2", 0), 17578573L); // //todo 指定输入数据为kafka topic DataStream kafkaDstream = env.addSource(new FlinkKafkaConsumer010 ( // "wxgz_dianyou_topic", "dianyou_wxgz", // "dianyou_filter", new SimpleStringSchema(), // pros).setStartFromSpecificOffsets(offsets) // pros).setStartFromEarliest() pros).setStartFromLatest() ).setParallelism(6); //todo 拿到字段统计 DataStream logDstream = kafkaDstream.map(new MapFunction () { @Override public JSONObject map(String value) throws Exception { JSONObject logJson_old = JSON.parseObject(value); JSONObject logJson_next = new JSONObject(); logJson_next.put("deviceId", logJson_old.getString("deviceId")); logJson_next.put("flume1Time", logJson_old.getLong("flume1Time")); logJson_next.put("flume2Time", logJson_old.getLong("flume2Time")); logJson_next.put("urlTimestamp", Long.parseLong(logJson_old.getString("urlTimestamp").replace(".", ""))); logJson_next.put("id", "aa"); return logJson_next; } }).setParallelism(6); //todo 做统计 DataStream lastLogDstream = logDstream.keyBy(new KeySelector () { @Override public String getKey(JSONObject value) throws Exception { // return value.getString("deviceId"); return value.getString("id"); } }).map(new RichMapFunction () { private transient ValueState valueState; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); valueState = getRuntimeContext(). getState(new ValueStateDescriptor<>("valueState", JSONObject.class)); } @Override public JSONObject map(JSONObject log) throws Exception { String deviceId = log.getString("deviceId"); Long flume1Time = log.getLong("flume1Time"); Long flume2Time = log.getLong("flume2Time"); Long urlTimestamp = log.getLong("urlTimestamp"); //todo 统计storm 各个节点的 JSONObject state = valueState.value(); try { if (state == null) { JSONObject countvalue = new JSONObject(); countvalue.put("flink_count", 1); countvalue.put("total_flume1", flume1Time); countvalue.put("total_flume2", flume2Time); countvalue.put("total_urlTimestamp", urlTimestamp); valueState.update(countvalue); log.put("flink_count", 1); log.put("nginx_flume1_avg", flume1Time - urlTimestamp); log.put("flume1_flume2_avg", flume2Time - flume1Time); log.put("nginx_flume1_time", flume1Time - urlTimestamp); log.put("flume1_flume2_time", flume2Time - flume1Time); //异常的数据 } else { //存在值,更新统计 Long flink_count = state.getLong("flink_count"); Long total_flume1 = state.getLong("total_flume1"); Long total_flume2 = state.getLong("total_flume2"); Long total_urlTimestamp = state.getLong("total_urlTimestamp"); Long flink_count_new = flink_count + 1; Long total_flume1_new = total_flume1 + flume1Time; Long total_flume2_new = total_flume2 + flume2Time; Long total_urlTimestamp_new = total_urlTimestamp + urlTimestamp; state.put("flink_count", flink_count_new); state.put("total_flume1", total_flume1_new); state.put("total_flume2", total_flume2_new); state.put("total_urlTimestamp", total_urlTimestamp_new); valueState.update(state); //下发到日志,新增5个字段, log.put("flink_count", flink_count_new); log.put("nginx_flume1_time", (flume1Time - urlTimestamp)); log.put("nginx_flume1_avg", (total_flume1_new - total_urlTimestamp_new) / flink_count_new); log.put("flume1_flume2_time", (flume2Time - flume1Time)); log.put("flume1_flume2_avg", (total_flume2_new - total_flume1_new)/ flink_count_new); } } catch (Exception ex) { ex.printStackTrace(); } return log; } }).setParallelism(8); // lastLogDstream.print(); lastLogDstream.writeAsText("C:\\Users\\Administrator\\Desktop\\flume_result.txt").setParallelism(1); // System.out.println(11111); // lastLogDstream.print(); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }
package com.coder.flink.core.aaa_spark; import com.alibaba.fastjson.JSONObject; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple5; import org.apache.flink.configuration.Configuration; import java.text.SimpleDateFormat; import java.util.Date; /** * 统计Nginx到客户端时间 */ public class NginxToClient_time { public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); DataSettext = env.readTextFile("C:\\Users\\Administrator\\Desktop\\tuisong.log"); DataSet flink_result = env.readTextFile("C:\\Users\\Administrator\\Desktop\\topic.txt"); DataSet mapDataSet = text.map(new RichMapFunction () { private SimpleDateFormat df; private SimpleDateFormat df2; private String year_mm_dd; private JSONObject json; private LongCounter numLines; private Long client_time_sum = 0L; private Long client_count = 0L; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); df = new SimpleDateFormat("yyyy-MM-dd"); df2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); Date t = new Date(); year_mm_dd = df.format(t); numLines = new LongCounter(); } @Override public JSONObject map(String log) throws Exception { String[] split = log.split(" "); //时分秒 String log_year_hh_mm = split[0].substring(0, split[0].length() - 7); String ms = split[0].substring(split[0].length() - 6, split[0].length() - 3); // String timeStr =year_mm_dd+" "+log_year_hh_mm; String timeStr = "2019-8-21" + " " + log_year_hh_mm; Date date = df2.parse(timeStr); String did = split[5].substring(7); long ts = date.getTime() + Long.parseLong(ms); json = new JSONObject(); client_time_sum += ts; client_count += 1; json.put("id", "aaa"); json.put("did", did); json.put("client_time", ts); json.put("client_count", client_count); json.put("client_time_sum", client_time_sum); return json; } }).setParallelism(1); //todo 读取发送到客户端的数据 DataSet > clientDataSet = mapDataSet.map(new MapFunction >() { @Override public Tuple5 map(JSONObject value) throws Exception { String did = value.getString("did"); Long client_time = value.getLong("client_time"); Long client_count = value.getLong("client_count"); Long client_total_time = value.getLong("client_time_sum"); return new Tuple5 (did, client_time, client_count, client_total_time, client_total_time); } }); // clientDataSet.print(); //todo 对flink统计结果进行过滤 DataSet > flinkDataSet = flink_result.map(new RichMapFunction >() { @Override public void open(Configuration parameters) throws Exception { super.open(parameters); } @Override public Tuple5 map(String value) { JSONObject flinkJson = JSONObject.parseObject(value); try { String deviceId = flinkJson.getString("deviceId"); Long nginx_storm_time = flinkJson.getLong("nginx_storm_time"); Long cunrrent_time = flinkJson.getLong("cunrrent_time"); Long nginx_storm_avg = flinkJson.getLong("nginx_storm_avg"); Long flink_count = flinkJson.getLong("flink_count"); // Long total_time = flinkJson.getLong("total_time"); //todo 返回值为 [deviceId,日志当前时间戳,nginx到storm花费时间差,nginx到storm花费时间差平均值,总时间累加] return new Tuple5<>(deviceId, cunrrent_time, nginx_storm_time, nginx_storm_avg, flink_count); } catch (Exception ex) { System.out.println("错误的flinkJson = " + flinkJson); } return null; } }); // flinkDataSet.print(); DataSet > result = flinkDataSet.join(clientDataSet).where(0).equalTo(0).map(new MapFunction , Tuple5 >, Tuple5 >() { private long client_total_time = 0L; private long flink_total_time = 0L; private long log_scount = 0L; @Override public Tuple5 map(Tuple2 , Tuple5 > value) throws Exception { //todo flink统计数据 Tuple5 flink_tuple = value.f0; String did = flink_tuple.f0; long flink_cunrrent_time = flink_tuple.f1; long nginx_storm_time = flink_tuple.f2; long nginx_storm_avg = flink_tuple.f3; long flink_count = flink_tuple.f4; flink_total_time += flink_cunrrent_time; //todo 统计topic到客户端平均时间 Tuple5 client_tuple = value.f1; long client_time = client_tuple.f1; Long client_count = client_tuple.f2; client_total_time += client_time; log_scount++; //求差值 long client_diff = client_time - flink_cunrrent_time; // if (did.equals("132485184289142")) { // System.out.println("client_time:" + client_time + ",flink_cunrrent_time:" + flink_cunrrent_time + ",差值:" + client_diff); // } //均值 long client_avg = (client_total_time - flink_total_time) / log_scount; // System.out.println("client_avg = " + client_avg); //todo did ,ng-storm-时间,平均值, 客户端-时间,平均值 return new Tuple5 (did, nginx_storm_time, nginx_storm_avg, client_diff, client_avg); } }); // result.print(); //todo 求总结果 // AggregateOperator > aggregate = map.aggregate(Aggregations.SUM, 1); // aggregate.print(); result.writeAsText("C:\\Users\\Administrator\\Desktop\\out3.txt").setParallelism(1); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }
package com.coder.flink.core.aaa_spark; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import java.util.Properties; /** * 读取 wxgz_dianyou_topic 数据写入文件 */ public class WriteTopicLogsToFile { public static void main(String[] args) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //todo 获取kafka的配置属性 args = new String[]{"--input-topic", "wxgz_dianyou_topic", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091", "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc1"}; ParameterTool parameterTool = ParameterTool.fromArgs(args); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); Properties pros = parameterTool.getProperties(); // //todo 指定输入数据为kafka topic DataStreamkafkaDstream = env.addSource(new FlinkKafkaConsumer010 ( "wxgz_dianyou_topic", // "dianyou_filter", new SimpleStringSchema(), pros).setStartFromEarliest() // pros).setStartFromLatest() ).setParallelism(6); //todo 拿到字段统计 DataStream logDstream = kafkaDstream.filter(new FilterFunction () { @Override public boolean filter(String value) throws Exception { JSONObject logJson = JSON.parseObject(value); if (!logJson.containsKey("nginx_storm")) { return false; } return true; } }).map(new MapFunction () { @Override public JSONObject map(String value) throws Exception { JSONObject logJson_old = JSON.parseObject(value); JSONObject logJson_next = new JSONObject(); logJson_next.put("nginx_storm", logJson_old.getLong("nginx_storm")); logJson_next.put("deviceId", logJson_old.getLong("deviceId")); logJson_next.put("cunrrent_time", logJson_old.getLong("cunrrent_time")); logJson_next.put("id", "aa"); return logJson_next; } }).setParallelism(6); //todo 做统计 DataStream lastLogDstream = logDstream.keyBy(new KeySelector () { @Override public String getKey(JSONObject value) throws Exception { // return value.getString("deviceId"); return value.getString("id"); } }).map(new RichMapFunction () { private transient ValueState valueState; private transient ListState itemState; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); valueState = getRuntimeContext(). getState(new ValueStateDescriptor<>("valueState", JSONObject.class)); ListStateDescriptor listStateDesc = new ListStateDescriptor<>( "errorValue", Long.class); itemState = getRuntimeContext().getListState(listStateDesc); } @Override public JSONObject map(JSONObject log) throws Exception { Long nginx_storm = log.getLong("nginx_storm"); Long cunrrent_time = log.getLong("cunrrent_time"); //todo 统计storm 各个节点的 JSONObject state = valueState.value(); try { if (state == null) { JSONObject countvalue = new JSONObject(); countvalue.put("nginx_storm_sum", nginx_storm); countvalue.put("nginx_storm_count", 1); countvalue.put("total_time", cunrrent_time); valueState.update(countvalue); log.put("flink_count", 1); log.put("nginx_storm_avg", nginx_storm); log.put("cunrrent_time", cunrrent_time); log.put("total_time", cunrrent_time); log.put("nginx_storm_time", nginx_storm); //异常的数据 if (nginx_storm > 3000L) { itemState.add(nginx_storm); log.put("异常数据总数", itemState.get()); } } else { //存在值,更新统计 Long nginx_storm_sum = state.getLong("nginx_storm_sum"); Long nginx_storm_count = state.getLong("nginx_storm_count"); Long total_time = state.getLong("total_time"); Long new_sum = nginx_storm_sum + nginx_storm; Long new_count = nginx_storm_count + 1; Long new_total_time = total_time + cunrrent_time; state.put("nginx_storm_sum", new_sum); state.put("nginx_storm_count", new_count); state.put("total_time", new_total_time); //异常的数据 if (nginx_storm > 3000L) { itemState.add(nginx_storm); } valueState.update(state); //下发到日志,新增5个字段, log.put("cunrrent_time", cunrrent_time); log.put("nginx_storm_time", nginx_storm); log.put("nginx_storm_avg", new_sum / new_count); //ng-storm的平均值 log.put("flink_count", new_count); //日志数量 log.put("total_time", new_total_time); //总时间 // log.put("异常数据汇总", itemState.get()); } } catch (Exception ex) { ex.printStackTrace(); } return log; } }).setParallelism(8); lastLogDstream.writeAsText("C:\\Users\\Administrator\\Desktop\\topic.txt").setParallelism(1); // System.out.println(11111); // lastLogDstream.print(); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } //指定Redis key并将flink数据类型映射到Redis数据类型 public static class SinkToRedis extends RichSinkFunction { private Jedis redisCon = null; private JedisPoolConfig config = null; private JedisPool pool = null; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); config = new JedisPoolConfig(); config.setMaxTotal(500); config.setMaxIdle(5); config.setMaxWaitMillis(1000 * 3600); config.setTestOnBorrow(true); config = new JedisPoolConfig(); pool = new JedisPool(config, "172.10.4.144", 6379, 20000, "7yxNFH8pcrII"); redisCon = pool.getResource(); } @Override public void invoke(JSONObject json, Context context) throws Exception { if (json.getString("listA") != null) { this.redisCon.hset("aaa_test", "listA", json.getString("listA")); } if (json.getString("listB") != null) { this.redisCon.hset("aaa_test", "listB", json.getString("listB")); } if (json.getString("listC") != null) { this.redisCon.hset("aaa_test", "listC", json.getString("listC")); } if (json.getString("listD") != null) { this.redisCon.hset("aaa_test", "listD", json.getString("listD")); } if (json.getString("listE") != null) { this.redisCon.hset("aaa_test", "listE", json.getString("listE")); } //存储相应的key String aaaa = json.getString("异常数据汇总"); if (aaaa != null) { this.redisCon.hset("aaa_test", "异常数据汇总", aaaa); } this.redisCon.hset("aaa_test", "总记录数", json.getString("总记录数")); this.redisCon.hset("aaa_test", "最小值", json.getString("最小值")); this.redisCon.hset("aaa_test", "最大值", json.getString("最大值")); this.redisCon.hset("aaa_test", "平均值", json.getString("平均值")); } @Override public void close() throws Exception { super.close(); if (this.redisCon != null) { this.redisCon.close(); } } } }