<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-coreartifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.11artifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-scala_2.11artifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-scala_2.11artifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.slf4jgroupId>
<artifactId>slf4j-log4j12artifactId>
<version>1.7.26version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-hdfsartifactId>
<version>2.9.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-commonartifactId>
<version>2.9.2version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_2.11artifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-jsonartifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-filesystem_2.11artifactId>
<version>1.8.1version>
dependency>
<dependency>
<groupId>org.apache.bahirgroupId>
<artifactId>flink-connector-redis_2.11artifactId>
<version>1.0version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.mavengroupId>
<artifactId>scala-maven-pluginartifactId>
<version>4.0.1version>
<executions>
<execution>
<id>scala-compile-firstid>
<phase>process-resourcesphase>
<goals>
<goal>add-sourcegoal>
<goal>compilegoal>
goals>
execution>
executions>
plugin>
plugins>
build>
package com.baizhi.demo04
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
object Flink {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//开启checkpoint
env.enableCheckpointing(7000,CheckpointingMode.EXACTLY_ONCE)
//checkpoint必须在2s内完成,如果完成不了终止
env.getCheckpointConfig.setCheckpointTimeout(4000)
//距离上一次的checkpoint完成之后需要等5s 之后再开启下一次的checkpoint
env.getCheckpointConfig.setMinPauseBetweenCheckpoints(5000)
env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)//只开启一个checkpoint线程
//在退出应用时候,不删除checkpoint数据
env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
//必须保证任务可以从checkpoint恢复,恢复不成功任务失败
env.getCheckpointConfig.setFailOnCheckpointingErrors(true)
val data: DataStream[String] = env.socketTextStream("Flink",9999)
data.flatMap(line=> line.split("\\s+"))
.map((_,1))
.keyBy(0)
.map(new CountMapFunction)
.print()
env.execute()
}
}
package com.baizhi.demo04
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.state.{StateTtlConfig, ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.time.Time
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
class CountMapFunction extends RichMapFunction[(String,Int),(String,Int)]{
var state:ValueState[Int]= _
override def map(value: (String, Int)): (String, Int) = {
var history = state.value()
if (history==null){
history=0
}
state.update(history+value._2)
(value._1,history+value._2)
}
override def open(parameters: Configuration): Unit = {
val dec = new ValueStateDescriptor[Int]("count",createTypeInformation[Int])
//1.创建TTLConfig
val ttlConfig = StateTtlConfig
.newBuilder(Time.seconds(80)) //这是state存活时间10s
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)//设置过期时间更新方式
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)//永远不要返回过期的状态
//.cleanupInRocksdbCompactFilter(1000)//处理完1000个状态查询时候,会启用一次CompactFilter
.build
//2.开启TTL
dec.enableTimeToLive(ttlConfig)
val context = getRuntimeContext
state=context.getState(dec)
}
}
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled.
#
# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
# .
#
state.backend: rocksdb
# Directory for checkpoints filesystem, when using any of the default bundled
# state backends.
#
state.checkpoints.dir: hdfs:///flink-checkpoints
# Default target directory for savepoints, optional.
#
state.savepoints.dir: hdfs:///flink-savepoints
# Flag to enable/disable incremental checkpoints for backends that
# support incremental checkpoints (like the RocksDB state backend).
#
state.backend.incremental: true
state.backend.rocksdb.ttl.compaction.filter.enabled: true
#==============================================================================
# HistoryServer
#==============================================================================
# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)
# Directory to upload completed jobs to. Add this directory to the list of
# monitored directories of the HistoryServer as well (see below).
jobmanager.archive.fs.dir: hdfs:///completed-jobs/
# The address under which the web-based HistoryServer listens.
historyserver.web.address: CentOS
# The port under which the web-based HistoryServer listens.
historyserver.web.port: 8082
# Comma separated list of directories to monitor for completed jobs.
historyserver.archive.fs.dir: hdfs:///completed-jobs/
# Interval in milliseconds for refreshing the monitored directories.
historyserver.archive.fs.refresh-interval: 10000
export HADOOP_CLASSPATH=
hadoop classpath