flink的job日志管理

flink job 运行设置

1、pom文件依赖


<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>
    <groupId>com.exam.iotgroupId>
    <artifactId>flink-jobartifactId>
    <version>0.0.1version>
    <name>flink-jobname>
    <packaging>jarpackaging>

    <properties>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8project.reporting.outputEncoding>
        <logback.version>1.2.3logback.version>
        <log4j-over-slf4j.version>1.7.30log4j-over-slf4j.version>

        <java.version>1.8java.version>
        <mysql.version>8.0.28mysql.version>
        <jackson.version>2.12.5jackson.version>
        <flink.version>1.9.3flink.version>
        <dubbo.version>2.7.14dubbo.version>
        <guava.version>31.1-jreguava.version>
        <influxdb-client.version>2.22influxdb-client.version>
    properties>

    <dependencies>
        <dependency>
            <groupId>ch.qos.logbackgroupId>
            <artifactId>logback-coreartifactId>
            <version>${logback.version}version>
        dependency>
        <dependency>
            <groupId>ch.qos.logbackgroupId>
            <artifactId>logback-accessartifactId>
            <version>${logback.version}version>
        dependency>
        <dependency>
            <groupId>ch.qos.logbackgroupId>
            <artifactId>logback-classicartifactId>
            <version>${logback.version}version>
        dependency>
        <dependency>
            <groupId>org.slf4jgroupId>
            <artifactId>log4j-over-slf4jartifactId>
            <version>${log4j-over-slf4j.version}version>
        dependency>

        <dependency>
            <groupId>org.apache.commonsgroupId>
            <artifactId>commons-lang3artifactId>
            <version>3.12.0version>
        dependency>

        
        <dependency>
            <groupId>org.redissongroupId>
            <artifactId>redissonartifactId>
            <version>3.16.8version>
        dependency>

        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>fastjsonartifactId>
            <version>1.2.79version>
        dependency>

        
        <dependency>
            <groupId>mysqlgroupId>
            <artifactId>mysql-connector-javaartifactId>
            <version>${mysql.version}version>
        dependency>

        <dependency>
            <groupId>com.zaxxergroupId>
            <artifactId>HikariCPartifactId>
            <version>4.0.3version>
        dependency>

        
        <dependency>
            <groupId>com.fasterxml.jackson.coregroupId>
            <artifactId>jackson-annotationsartifactId>
            <version>${jackson.version}version>
        dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.coregroupId>
            <artifactId>jackson-databindartifactId>
            <version>${jackson.version}version>
        dependency>

        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-coreartifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-javaartifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-clients_2.11artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-streaming-java_2.11artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-kafka_2.11artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-runtime-web_2.11artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-avroartifactId>
            <version>${flink.version}version>
        dependency>

        
        <dependency>
            <groupId>com.google.guavagroupId>
            <artifactId>guavaartifactId>
            <version>${guava.version}version>
        dependency>

        
        <dependency>
            <groupId>com.google.code.gsongroupId>
            <artifactId>gsonartifactId>
            <version>2.9.0version>
        dependency>

        <dependency>
            <groupId>org.projectlombokgroupId>
            <artifactId>lombokartifactId>
            <version>1.18.22version>
            <optional>trueoptional>
        dependency>
        
        <dependency>
            <groupId>net.jodahgroupId>
            <artifactId>expiringmapartifactId>
            <version>0.5.10version>
        dependency>

        
        <dependency>
            <groupId>org.influxdbgroupId>
            <artifactId>influxdb-javaartifactId>
            <version>${influxdb-client.version}version>
        dependency>
        
        <dependency>
            <groupId>com.taosdata.jdbcgroupId>
            <artifactId>taos-jdbcdriverartifactId>
            <version>2.0.37version>
        dependency>

        <dependency>
            <groupId>org.apache.commonsgroupId>
            <artifactId>commons-pool2artifactId>
            <version>2.11.1version>
        dependency>

        <dependency>
            <groupId>cn.hutoolgroupId>
            <artifactId>hutool-allartifactId>
            <version>5.7.22version>
        dependency>
    dependencies>

    <build>
        <finalName>${project.name}finalName>

        <plugins>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <version>3.8.1version>
                <configuration>
                    <source>1.8source>
                    <target>1.8target>
                configuration>
            plugin>

            
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-assembly-pluginartifactId>
                <version>3.3.0version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>com.exam.job.JobApplicationmainClass>
                        manifest>
                    archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependenciesdescriptorRef>
                    descriptorRefs>
                configuration>
            plugin>

            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-shade-pluginartifactId>
                <version>3.2.4version>
                <executions>
                    <execution>
                        <phase>packagephase>
                        <goals>
                            <goal>shadegoal>
                        goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>com.google.code.findbugs:jsr305exclude>
                                    <exclude>org.slf4j:*exclude>
                                    <exclude>log4j:*exclude>
                                excludes>
                            artifactSet>
                            <filters>
                                <filter>
                                    
                                    <artifact>*:*artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SFexclude>
                                        <exclude>META-INF/*.DSAexclude>
                                        <exclude>META-INF/*.RSAexclude>
                                    excludes>
                                filter>
                            filters>

                            <transformers combine.children="append">
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.exam.iot.JobApplicationmainClass>
                                transformer>

                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                    <resource>reference.confresource>
                                transformer>

                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">
                                transformer>
                            transformers>

                            <relocations>
                                <relocation>
                                    <pattern>org.codehaus.plexus.utilpattern>
                                    <shadedPattern>org.shaded.plexus.utilshadedPattern>
                                    <excludes>
                                        <exclude>org.codehaus.plexus.util.xml.Xpp3Domexclude>
                                        <exclude>org.codehaus.plexus.util.xml.pull.*exclude>
                                    excludes>
                                relocation>
                            relocations>
                        configuration>
                    execution>
                executions>
            plugin>
        plugins>

        <resources>
            <resource>
                <directory>src/main/javadirectory>
                <includes>
                    <include>**/*.ymlinclude>
                    <include>**/*.propertiesinclude>
                    <include>**/*.xmlinclude>
                includes>
                <filtering>falsefiltering>
            resource>
            <resource>
                <directory>src/main/resourcesdirectory>
                <includes>
                    <include>**/*.ymlinclude>
                    <include>**/*.propertiesinclude>
                    <include>**/*.xmlinclude>
                includes>
                <filtering>falsefiltering>
            resource>
        resources>
    build>
project>

2、在src/main/resources创建logback.xml文件


<configuration>

    <property name="LOGS" value="./logs/" />
    <appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
        <layout class="ch.qos.logback.classic.PatternLayout">
         	 
             <pattern>%black(%d{ISO8601}) %highlight(%-5level) [%blue(%t)] %yellow(%C{1.}): %msg%n%throwablepattern>
              
             <pattern>%date{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) (%file:%line\)- %m%npattern>
            
            <charset>UTF-8charset>
        layout>
    appender>

    <appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>${LOGS}/electric-job.logfile>
        <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
            <Pattern>%d %p %C{1.} [%t] %m%nPattern>
        encoder>
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            
            <fileNamePattern>${LOGS}/archived/job-%d{yyyy-MM-dd}.%i.log
            fileNamePattern>
            <timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
                <maxFileSize>100MBmaxFileSize>
            timeBasedFileNamingAndTriggeringPolicy>
        rollingPolicy>
    appender>

   
    
    
    <appender name="syslog" class="ch.qos.logback.core.rolling.RollingFileAppender">
        
        <File>./logs/%d/plug-flink-job.logFile>
        
        
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            
            
            
            <fileNamePattern>./logs/%d/plug-flink-job.%d.%i.logfileNamePattern>
            
            <maxHistory>30maxHistory>
            <timeBasedFileNamingAndTriggeringPolicy  class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
                
                <maxFileSize>128MBmaxFileSize>
            timeBasedFileNamingAndTriggeringPolicy>
        rollingPolicy>
        <encoder>
            
            <pattern>
                %d %p (%file:%line\)- %m%n
            pattern>
            
            <charset>UTF-8charset> 
        encoder>
    appender>

    
    <root level="INFO">
        <appender-ref ref="RollingFile" />
        <appender-ref ref="Console" />
    root>

    
    <logger name="com.exam.iot" level="INFO" additivity="false">
        <appender-ref ref="RollingFile" />
        <appender-ref ref="Console" />
    logger>
configuration>

3、job示例

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;

import java.util.Properties;

public class FlinkKafkaWordCount {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(5000);
        env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "10.170.224.246:9092,10.170.224.246:9093,10.170.224.246:9094");
        properties.setProperty("group.id", "word-count-group");

        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("wc_topic_post", new SimpleStringSchema(), properties);

        DataStream<WordWithCount> counts = env.addSource(consumer)
                .flatMap(new FlatMapFunction<String, WordWithCount>() {
                    @Override
                    public void flatMap(String value, Collector<WordWithCount> collector) throws Exception {
                        for (String word : value.split("\t")) {
                            collector.collect(new WordWithCount(word, 1L));
                        }
                    }
                })
                .keyBy("word")
                .timeWindow(Time.seconds(5))
                .reduce(new ReduceFunction<WordWithCount>() {
                    @Override
                    public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {
                        return new WordWithCount(a.word, a.count + b.count);
                    }
                });

        counts.print().setParallelism(1);
        env.execute("flink-kafka-wordcount");
    }

    public static class WordWithCount {
        private String word;
        private long count;

        public WordWithCount() {
        }

        public WordWithCount(String word, long count) {
            this.word = word;
            this.count = count;
        }

        public String getWord() {
            return word;
        }

        public void setWord(String word) {
            this.word = word;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }

        @Override
        public String toString() {
            return word + " : " + count;
        }
    }
}

4、设置checkpoint

       // 并行度优先级:算子>env>client>配置文件
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        env.enableCheckpointing(10 * 60 * 1000, CheckpointingMode.EXACTLY_ONCE);
        // set checkpoint
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();
        checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConfig.setCheckpointTimeout(60 * 1000);
        checkpointConfig.setMinPauseBetweenCheckpoints((60 * 1000));
        checkpointConfig.setMaxConcurrentCheckpoints(1);
        checkpointConfig.setTolerableCheckpointFailureNumber(3);
        // checkpointConfig.setPreferCheckpointForRecovery(true);
        // checkpointConfig.enableUnalignedCheckpoints(); // 非对齐的 Checkpoint
        // 可以配置 Checkpoint 定期持久化到从外部存储中。使用这种方式不会在任务正常停止的过程中清理 Checkpoint 数据,而是会一直保存在外部存储中,另外我们也可以通过从外部 Checkpoint 中对任务进行恢复:
        checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        // 设置重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 60 * 1000));

5、job设置

		try {
            env.addSource(AppConfig.getConsumer())
                    .setParallelism(8)
                    .map(new RichMapFunction<String, DeviceClientInfo>() {
                        @Override
                        public DeviceClientInfo map(String jsonLine) {
                            try {
                                log.info("jsonLine={}", jsonLine);
                                String line = jsonLine.replaceAll("socket_abb.", "");
                                return JSONObject.parseObject(line, DeviceClientInfo.class);
                            } catch (Exception e) {
                                log.error("job.line.map报错: {}", e.getMessage());
                            }
                            return null;
                        }
                    })
                    .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<DeviceClientInfo>() {
                        // 观察到的最大时间戳
                        private long currentMaxTs = Long.MIN_VALUE;

                        @Nullable
                        @Override
                        public Watermark getCurrentWatermark() {
                            // 生成具有30秒容忍度的水位线
                            // 定义30秒的容忍间隔时间,即允许数据的最大乱序时间
                            long maxOutofOrderness = 30 * 1000;
                            return new Watermark(currentMaxTs - maxOutofOrderness);
                        }

                        @Override
                        public long extractTimestamp(DeviceClientInfo deviceClientInfo, long previousElementTimestamp) {
                            //获取当前记录的时间戳
                            long currentTs = deviceClientInfo.getTimestamp();
                            // 更新最大的时间戳
                            currentMaxTs = Math.max(currentMaxTs, currentTs);
                            // 返回记录的时间戳
                            return currentTs;
                        }
                    })
                    .filter(new RichFilterFunction<DeviceClientInfo>() {
                        @Override
                        public boolean filter(DeviceClientInfo deviceClientInfo) {
                            return deviceClientInfo != null
                                    && StringUtils.isNotEmpty(deviceClientInfo.getDeviceId())
                                    && deviceClientInfo.getPayload() != null
                                    && deviceClientInfo.getPayload().getProperties() != null;
                        }
                    })
                    .map(new RichMapFunction<DeviceClientInfo, Electric>() {
                        final int recordFixedTime = 15;
                        @Override
                        public Electric map(DeviceClientInfo deviceClientInfo) {
                            try {
                                return parseElectric(recordFixedTime, deviceClientInfo);
                            } catch (Exception e) {
                                log.error("job.dci.map报错: ", e);
                            }
                            return null;
                        }
                    }).filter(new RichFilterFunction<Electric>() {
                        @Override
                        public boolean filter(Electric electric) {
                            return electric != null;
                        }
                    })
                    .keyBy(Electric::getDeviceId)
                    .timeWindow(Time.minutes(1))
                    .reduce(new ReduceFunction<Electric>() {
                        @Override
                        public Electric reduce(Electric preE, Electric curE) {
                            log.info("***** Electric curE={}", JSONObject.toJSONString(curE));
                            return curE;
                        }
                    })
                    .addSink(new SinkMysqlWithOne())
                    .name("mysqlSink");

        } catch (Exception e) {
            e.printStackTrace();
        }

        env.execute("degree-sink");

6、在src/main/resources目录下创建log4j.properties文件

log4j.rootLogger=INFO,logfile,stdout

#log4j.logger.org.springframework.web.servlet=INFO,db
#log4j.logger.org.springframework.beans.factory.xml=INFO
#log4j.logger.com.exam.job=INFO,db
#log4j.appender.stdout=org.apache.log4j.ConsoleAppender
#log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
#log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss} %p [%c] %X{remoteAddr}  %X{remotePort}  %X{remoteHost}  %X{remoteUser} operator\:[\u59D3\u540D\:%X{userName} \u5DE5\u53F7\:%X{userId}] message\:<%m>%n

#write log into file
log4j.appender.logfile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.logfile.Threshold=warn
log4j.appender.logfile.File=./logs/job.log
log4j.appender.logfile.DatePattern=.yyyy-MM-dd
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=[app]-[%d{yyyy-MM-dd HH:mm:ss}] %X{remoteAddr} %X{remotePort} %m %n

#display in console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Threshold=info
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[app]-[%d{yyyy-MM-dd HH:mm:ss}] %X{remoteAddr} %X{remotePort} %m %n

7、滚动窗口示例

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class WindowTest {
    public static void main(String[] args) {
        try {
            // 创建流处理的执行环境
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

            //2.使用StreamExecutionEnvironment创建DataStream
            //Source(可以有多个Source)
            //Socket 监听本地端口8888
            // 接收一个socket文本流
            DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);

            // Transformation(s) 对数据进行转换处理统计,先分词,再按照word进行分组,最后进行聚合统计
            DataStream<Tuple2<String, Integer>> windowCount = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
                    String[] words = line.split(" ");
                    for (String word : words) {
                        //将每个单词与 1 组合,形成一个元组
                        Tuple2<String, Integer> tp = Tuple2.of(word, 1);
                        //将组成的Tuple放入到 Collector 集合,并输出
                        collector.collect(tp);
                    }
                }
            });

            // 1. 滚动窗口(Tumbling Windows)使用例子
            //进行分组聚合(keyBy:将key相同的分到一个组中) //定义一个1分钟的翻滚窗口,每分钟统计一次
            DataStream<Tuple2<String, Integer>> windowStream = windowCount.keyBy(0)
                    .timeWindow(Time.minutes(1))
                    .sum(1);

            // 调用Sink (Sink必须调用)
            windowStream.print("windows: ").setParallelism(1);
            //timePoint+=30;
            //启动(这个异常不建议try...catch... 捕获,因为它会抛给上层flink,flink根据异常来做相应的重启策略等处理)
            env.execute("StreamWordCount");
        }catch (Exception e) {
            e.printStackTrace();
        }
    }
}

8、kafka消费配置

  static FlinkKafkaConsumer<String> getConsumer() {
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, KAFKA_GROUP_ID);
        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true);
        props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "3000");
        // start rebanlance conf
        props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "10000");
        props.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, "3000");

        props.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, "5000");
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
        // consumer ability
        props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, "1000");
        props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "2000");
        // idle time
        props.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, "3000");
        props.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, "60000");

        props.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, "52428800");
        props.put(ConsumerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG, "30000");

        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(KAFKA_TOPIC, new SimpleStringSchema(), props);
        consumer.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<String>() {
            @Override
            public long extractAscendingTimestamp(String s) {
                return System.currentTimeMillis();
            }
        });
        return consumer;
    }

你可能感兴趣的:(flink,mysql,算法,数据结构)