flink job 运行设置
1、pom文件依赖
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>com.exam.iotgroupId>
<artifactId>flink-jobartifactId>
<version>0.0.1version>
<name>flink-jobname>
<packaging>jarpackaging>
<properties>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8project.reporting.outputEncoding>
<logback.version>1.2.3logback.version>
<log4j-over-slf4j.version>1.7.30log4j-over-slf4j.version>
<java.version>1.8java.version>
<mysql.version>8.0.28mysql.version>
<jackson.version>2.12.5jackson.version>
<flink.version>1.9.3flink.version>
<dubbo.version>2.7.14dubbo.version>
<guava.version>31.1-jreguava.version>
<influxdb-client.version>2.22influxdb-client.version>
properties>
<dependencies>
<dependency>
<groupId>ch.qos.logbackgroupId>
<artifactId>logback-coreartifactId>
<version>${logback.version}version>
dependency>
<dependency>
<groupId>ch.qos.logbackgroupId>
<artifactId>logback-accessartifactId>
<version>${logback.version}version>
dependency>
<dependency>
<groupId>ch.qos.logbackgroupId>
<artifactId>logback-classicartifactId>
<version>${logback.version}version>
dependency>
<dependency>
<groupId>org.slf4jgroupId>
<artifactId>log4j-over-slf4jartifactId>
<version>${log4j-over-slf4j.version}version>
dependency>
<dependency>
<groupId>org.apache.commonsgroupId>
<artifactId>commons-lang3artifactId>
<version>3.12.0version>
dependency>
<dependency>
<groupId>org.redissongroupId>
<artifactId>redissonartifactId>
<version>3.16.8version>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.2.79version>
dependency>
<dependency>
<groupId>mysqlgroupId>
<artifactId>mysql-connector-javaartifactId>
<version>${mysql.version}version>
dependency>
<dependency>
<groupId>com.zaxxergroupId>
<artifactId>HikariCPartifactId>
<version>4.0.3version>
dependency>
<dependency>
<groupId>com.fasterxml.jackson.coregroupId>
<artifactId>jackson-annotationsartifactId>
<version>${jackson.version}version>
dependency>
<dependency>
<groupId>com.fasterxml.jackson.coregroupId>
<artifactId>jackson-databindartifactId>
<version>${jackson.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-coreartifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-javaartifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.11artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-java_2.11artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_2.11artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-runtime-web_2.11artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-avroartifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>com.google.guavagroupId>
<artifactId>guavaartifactId>
<version>${guava.version}version>
dependency>
<dependency>
<groupId>com.google.code.gsongroupId>
<artifactId>gsonartifactId>
<version>2.9.0version>
dependency>
<dependency>
<groupId>org.projectlombokgroupId>
<artifactId>lombokartifactId>
<version>1.18.22version>
<optional>trueoptional>
dependency>
<dependency>
<groupId>net.jodahgroupId>
<artifactId>expiringmapartifactId>
<version>0.5.10version>
dependency>
<dependency>
<groupId>org.influxdbgroupId>
<artifactId>influxdb-javaartifactId>
<version>${influxdb-client.version}version>
dependency>
<dependency>
<groupId>com.taosdata.jdbcgroupId>
<artifactId>taos-jdbcdriverartifactId>
<version>2.0.37version>
dependency>
<dependency>
<groupId>org.apache.commonsgroupId>
<artifactId>commons-pool2artifactId>
<version>2.11.1version>
dependency>
<dependency>
<groupId>cn.hutoolgroupId>
<artifactId>hutool-allartifactId>
<version>5.7.22version>
dependency>
dependencies>
<build>
<finalName>${project.name}finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.8.1version>
<configuration>
<source>1.8source>
<target>1.8target>
configuration>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-assembly-pluginartifactId>
<version>3.3.0version>
<configuration>
<archive>
<manifest>
<mainClass>com.exam.job.JobApplicationmainClass>
manifest>
archive>
<descriptorRefs>
<descriptorRef>jar-with-dependenciesdescriptorRef>
descriptorRefs>
configuration>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<version>3.2.4version>
<executions>
<execution>
<phase>packagephase>
<goals>
<goal>shadegoal>
goals>
<configuration>
<artifactSet>
<excludes>
<exclude>com.google.code.findbugs:jsr305exclude>
<exclude>org.slf4j:*exclude>
<exclude>log4j:*exclude>
excludes>
artifactSet>
<filters>
<filter>
<artifact>*:*artifact>
<excludes>
<exclude>META-INF/*.SFexclude>
<exclude>META-INF/*.DSAexclude>
<exclude>META-INF/*.RSAexclude>
excludes>
filter>
filters>
<transformers combine.children="append">
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.exam.iot.JobApplicationmainClass>
transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.confresource>
transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">
transformer>
transformers>
<relocations>
<relocation>
<pattern>org.codehaus.plexus.utilpattern>
<shadedPattern>org.shaded.plexus.utilshadedPattern>
<excludes>
<exclude>org.codehaus.plexus.util.xml.Xpp3Domexclude>
<exclude>org.codehaus.plexus.util.xml.pull.*exclude>
excludes>
relocation>
relocations>
configuration>
execution>
executions>
plugin>
plugins>
<resources>
<resource>
<directory>src/main/javadirectory>
<includes>
<include>**/*.ymlinclude>
<include>**/*.propertiesinclude>
<include>**/*.xmlinclude>
includes>
<filtering>falsefiltering>
resource>
<resource>
<directory>src/main/resourcesdirectory>
<includes>
<include>**/*.ymlinclude>
<include>**/*.propertiesinclude>
<include>**/*.xmlinclude>
includes>
<filtering>falsefiltering>
resource>
resources>
build>
project>
2、在src/main/resources
创建logback.xml
文件
<configuration>
<property name="LOGS" value="./logs/" />
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
<layout class="ch.qos.logback.classic.PatternLayout">
<pattern>%black(%d{ISO8601}) %highlight(%-5level) [%blue(%t)] %yellow(%C{1.}): %msg%n%throwablepattern>
<pattern>%date{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) (%file:%line\)- %m%npattern>
<charset>UTF-8charset>
layout>
appender>
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOGS}/electric-job.logfile>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<Pattern>%d %p %C{1.} [%t] %m%nPattern>
encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOGS}/archived/job-%d{yyyy-MM-dd}.%i.log
fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>100MBmaxFileSize>
timeBasedFileNamingAndTriggeringPolicy>
rollingPolicy>
appender>
<appender name="syslog" class="ch.qos.logback.core.rolling.RollingFileAppender">
<File>./logs/%d/plug-flink-job.logFile>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>./logs/%d/plug-flink-job.%d.%i.logfileNamePattern>
<maxHistory>30maxHistory>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>128MBmaxFileSize>
timeBasedFileNamingAndTriggeringPolicy>
rollingPolicy>
<encoder>
<pattern>
%d %p (%file:%line\)- %m%n
pattern>
<charset>UTF-8charset>
encoder>
appender>
<root level="INFO">
<appender-ref ref="RollingFile" />
<appender-ref ref="Console" />
root>
<logger name="com.exam.iot" level="INFO" additivity="false">
<appender-ref ref="RollingFile" />
<appender-ref ref="Console" />
logger>
configuration>
3、job示例
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class FlinkKafkaWordCount {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "10.170.224.246:9092,10.170.224.246:9093,10.170.224.246:9094");
properties.setProperty("group.id", "word-count-group");
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("wc_topic_post", new SimpleStringSchema(), properties);
DataStream<WordWithCount> counts = env.addSource(consumer)
.flatMap(new FlatMapFunction<String, WordWithCount>() {
@Override
public void flatMap(String value, Collector<WordWithCount> collector) throws Exception {
for (String word : value.split("\t")) {
collector.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(Time.seconds(5))
.reduce(new ReduceFunction<WordWithCount>() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {
return new WordWithCount(a.word, a.count + b.count);
}
});
counts.print().setParallelism(1);
env.execute("flink-kafka-wordcount");
}
public static class WordWithCount {
private String word;
private long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
4、设置checkpoint
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(10 * 60 * 1000, CheckpointingMode.EXACTLY_ONCE);
CheckpointConfig checkpointConfig = env.getCheckpointConfig();
checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
checkpointConfig.setCheckpointTimeout(60 * 1000);
checkpointConfig.setMinPauseBetweenCheckpoints((60 * 1000));
checkpointConfig.setMaxConcurrentCheckpoints(1);
checkpointConfig.setTolerableCheckpointFailureNumber(3);
checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 60 * 1000));
5、job设置
try {
env.addSource(AppConfig.getConsumer())
.setParallelism(8)
.map(new RichMapFunction<String, DeviceClientInfo>() {
@Override
public DeviceClientInfo map(String jsonLine) {
try {
log.info("jsonLine={}", jsonLine);
String line = jsonLine.replaceAll("socket_abb.", "");
return JSONObject.parseObject(line, DeviceClientInfo.class);
} catch (Exception e) {
log.error("job.line.map报错: {}", e.getMessage());
}
return null;
}
})
.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<DeviceClientInfo>() {
private long currentMaxTs = Long.MIN_VALUE;
@Nullable
@Override
public Watermark getCurrentWatermark() {
long maxOutofOrderness = 30 * 1000;
return new Watermark(currentMaxTs - maxOutofOrderness);
}
@Override
public long extractTimestamp(DeviceClientInfo deviceClientInfo, long previousElementTimestamp) {
long currentTs = deviceClientInfo.getTimestamp();
currentMaxTs = Math.max(currentMaxTs, currentTs);
return currentTs;
}
})
.filter(new RichFilterFunction<DeviceClientInfo>() {
@Override
public boolean filter(DeviceClientInfo deviceClientInfo) {
return deviceClientInfo != null
&& StringUtils.isNotEmpty(deviceClientInfo.getDeviceId())
&& deviceClientInfo.getPayload() != null
&& deviceClientInfo.getPayload().getProperties() != null;
}
})
.map(new RichMapFunction<DeviceClientInfo, Electric>() {
final int recordFixedTime = 15;
@Override
public Electric map(DeviceClientInfo deviceClientInfo) {
try {
return parseElectric(recordFixedTime, deviceClientInfo);
} catch (Exception e) {
log.error("job.dci.map报错: ", e);
}
return null;
}
}).filter(new RichFilterFunction<Electric>() {
@Override
public boolean filter(Electric electric) {
return electric != null;
}
})
.keyBy(Electric::getDeviceId)
.timeWindow(Time.minutes(1))
.reduce(new ReduceFunction<Electric>() {
@Override
public Electric reduce(Electric preE, Electric curE) {
log.info("***** Electric curE={}", JSONObject.toJSONString(curE));
return curE;
}
})
.addSink(new SinkMysqlWithOne())
.name("mysqlSink");
} catch (Exception e) {
e.printStackTrace();
}
env.execute("degree-sink");
6、在src/main/resources
目录下创建log4j.properties
文件
log4j.rootLogger=INFO,logfile,stdout
#log4j.logger.org.springframework.web.servlet=INFO,db
#log4j.logger.org.springframework.beans.factory.xml=INFO
#log4j.logger.com.exam.job=INFO,db
#log4j.appender.stdout=org.apache.log4j.ConsoleAppender
#log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
#log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss} %p [%c] %X{remoteAddr} %X{remotePort} %X{remoteHost} %X{remoteUser} operator\:[\u59D3\u540D\:%X{userName} \u5DE5\u53F7\:%X{userId}] message\:<%m>%n
#write log into file
log4j.appender.logfile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.logfile.Threshold=warn
log4j.appender.logfile.File=./logs/job.log
log4j.appender.logfile.DatePattern=.yyyy-MM-dd
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=[app]-[%d{yyyy-MM-dd HH:mm:ss}] %X{remoteAddr} %X{remotePort} %m %n
#display in console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Threshold=info
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[app]-[%d{yyyy-MM-dd HH:mm:ss}] %X{remoteAddr} %X{remotePort} %m %n
7、滚动窗口示例
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class WindowTest {
public static void main(String[] args) {
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
DataStream<Tuple2<String, Integer>> windowCount = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
Tuple2<String, Integer> tp = Tuple2.of(word, 1);
collector.collect(tp);
}
}
});
DataStream<Tuple2<String, Integer>> windowStream = windowCount.keyBy(0)
.timeWindow(Time.minutes(1))
.sum(1);
windowStream.print("windows: ").setParallelism(1);
env.execute("StreamWordCount");
}catch (Exception e) {
e.printStackTrace();
}
}
}
8、kafka消费配置
static FlinkKafkaConsumer<String> getConsumer() {
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
props.put(ConsumerConfig.GROUP_ID_CONFIG, KAFKA_GROUP_ID);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true);
props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "3000");
props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "10000");
props.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, "3000");
props.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, "5000");
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, "1000");
props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "2000");
props.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, "3000");
props.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, "60000");
props.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, "52428800");
props.put(ConsumerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG, "30000");
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(KAFKA_TOPIC, new SimpleStringSchema(), props);
consumer.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<String>() {
@Override
public long extractAscendingTimestamp(String s) {
return System.currentTimeMillis();
}
});
return consumer;
}