前言
基于Flink 1.14写了一个quickstart旨在能够让大家更快上手,感受flink带来的优秀体验
注意:flink1.13-和1.14+ api有很多不同的地方
依赖
使用的是scala2.12
8
8
UTF-8
8
8
1.14.5
2.12
1.8
2.12.15
1.7.36
2.7.5
1.10.3
5.8.12
5.1.47
1.18.20
org.apache.flink
flink-clients_${scala.binary.version}
${flink.version}
provided
org.apache.flink
flink-runtime-web_${scala.binary.version}
${flink.version}
provided
cn.hutool
hutool-all
${hutool.version}
org.apache.flink
flink-connector-elasticsearch7_${scala.binary.version}
${flink.version}
org.apache.flink
flink-connector-kafka_${scala.binary.version}
${flink.version}
org.apache.flink
flink-jdbc_${scala.binary.version}
${flink.jdbc.version}
mysql
mysql-connector-java
${mysql.connector.version}
org.projectlombok
lombok
${lombok.version}
org.slf4j
slf4j-api
${slf4j.version}
org.slf4j
slf4j-log4j12
${slf4j.version}
启动模版
package flink.model;
import org.apache.flink.api.java.utils.ParameterTool;
import java.io.IOException;
import java.io.InputStream;
/**
* @author zhangxuecheng4441
* @date 2022/2/24/024 17:31
*/
public interface FlinkModel {
String ENV_PARAM = "env";
String DEV_ENV = "dev";
String LOCAL_ENV_PARAM = "local";
/**
* 获取resource下配置
*
* @param envConf envConf
* @return ParameterTool
*/
static ParameterTool getInitConfig(String envConf) {
InputStream resourceAsStream = FlinkStreamModel.class.getResourceAsStream("/" + String.format("application-%s.properties", envConf));
ParameterTool parameterTool = null;
try {
parameterTool = ParameterTool.fromPropertiesFile(resourceAsStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
return parameterTool;
}
}
package flink.model;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.utils.MultipleParameterTool;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.client.program.StreamContextEnvironment;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* flink 流任务模板
*
* @author zhangxuecheng4441
* @date 2022/10/31/031 19:36
*/
@Slf4j
public class FlinkStreamModel implements FlinkModel {
/**
* environment
*/
public static StreamExecutionEnvironment env;
/**
* cli args
*/
public static MultipleParameterTool param;
/**
* default config
*/
public static Map config;
/**
* @param args params
* @return StreamExecutionEnvironment
*/
public static StreamExecutionEnvironment initEnv(String[] args) {
//get params
val params = MultipleParameterTool.fromArgs(args);
param = params;
long checkpointInterval = params.getLong("checkpointInterval", 120 * 1000);
int parallelism = params.getInt("parallelism", 2);
// set up the execution environment
env = StreamEnvBuilder.builder()
.setCheckpointInterval(checkpointInterval)
.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
.setCheckpointTimeout(600 * 1000L)
.setMinPauseBetweenCheckpoints(5 * 1000)
.setTolerableCheckpointFailureNumber(2)
.setMaxConcurrentCheckpoints(1)
.setDefaultRestartStrategy(3, Time.of(3, TimeUnit.MINUTES), Time.of(2, TimeUnit.MINUTES))
.setParallelism(parallelism)
.build();
//netty buffer 传输超时
env.setBufferTimeout(1000);
//todo debug 增加参数 -local local 可以IDEA测试开启 http://localhost:8081/ 研发环境
if (params.has(LOCAL_ENV_PARAM)) {
env = StreamContextEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
}
//批模式运行
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
//根据环境 加载配置文件到 PropLoader
String envConf = params.get(ENV_PARAM, DEV_ENV);
ParameterTool parameterTool = FlinkModel.getInitConfig(envConf);
config = parameterTool.toMap();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(parameterTool);
return env;
}
}
数据源
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//
package org.apache.flink.streaming.api.functions.source;
import java.io.Serializable;
import org.apache.flink.annotation.Public;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.functions.Function;
import org.apache.flink.streaming.api.watermark.Watermark;
@Public
public interface SourceFunction extends Function, Serializable {
void run(SourceFunction.SourceContext var1) throws Exception;
void cancel();
@Public
public interface SourceContext {
void collect(T var1);
@PublicEvolving
void collectWithTimestamp(T var1, long var2);
@PublicEvolving
void emitWatermark(Watermark var1);
@PublicEvolving
void markAsTemporarilyIdle();
Object getCheckpointLock();
void close();
}
}
quick-start
package flink.launch;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.thread.ThreadUtil;
import flink.model.FlinkStreamModel;
import flink.sink.GenericSink;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.List;
/**
* flink stream demo
* todo IDEA run main方法时 need [add IDEA provided选项 ] AND [add -local local to program argument]
*
* @author zhangxuecheng4441
* @date 2022/2/22/022 11:43
*/
@Slf4j
public class FlinkStreamDemoApp extends FlinkStreamModel {
/**
* idea 启动需要配置 [add dependencies with "provided"]
*
* @param args args
* @throws Exception Exception
*/
public static void main(String[] args) throws Exception {
initEnv(args);
//获取数据源
val source = env.addSource(new SourceFunction() {
boolean out = true;
@Override
public void run(SourceContext sourceContext) throws Exception {
while (out) {
ThreadUtil.sleep(1.2 * 1000);
val str = "print-time:" + DateUtil.now();
log.warn("add string:{}", str);
sourceContext.collect(str);
}
}
@Override
public void cancel() {
out = false;
}
}).setParallelism(1).name("string-source");
//打印
source.print().setParallelism(2).name("print-time");
//每5个数据进行一次数据输出
source.addSink(new GenericSink(5) {
@Override
public void flush(List elements) {
log.error("output str:{}", elements);
}
}).setParallelism(4).name("branch-sink");
//todo debug 增加参数 -local local 可以IDEA测试开启 http://localhost:8081/ 研发环境
env.execute("DemoStreamApp");
}
}
能够通过flink-runtime-web包获取调试的web页面
算子
除了常用算子,process应该是灵活度非常高的算子了
package flink.launch.stream;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.RandomUtil;
import flink.function.check.JsonStrCheckFunc;
import flink.model.FlinkStreamModel;
import flink.pojo.AccountUploadPojo;
import flink.sink.GenericSink;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.OutputTag;
import java.util.List;
/**
* 一堆数据 需要将其中数据分成多个类别 进行再处理使用旁路
* 用例: 读取数据找到正确的json数据打印 不正确的输出
* todo IDEA run main方法时 need [add IDEA provided选项 ] AND [add -local local to program argument]
*
* @author zhangxuecheng4441
* @date 2022/3/13/013 15:21
*/
@Slf4j
public class ProcessApp extends FlinkStreamModel {
public static void main(String[] args) throws Exception {
initEnv(args);
val errorTag = "error";
//造假数据
val source = env.addSource(new SourceFunction() {
boolean out = true;
@Override
public void run(SourceContext sourceContext) throws Exception {
val strList = ListUtil.toList("{\"uid\":1}", "1", "2");
while (out) {
ThreadUtil.sleep(1.2 * 1000);
val str = strList.get(RandomUtil.randomInt(0, 3));
log.warn("add string:{}", str);
sourceContext.collect(str);
}
}
@Override
public void cancel() {
out = false;
}
}).setParallelism(1).name("string-source");
//分流 判断json直接返回 非json添加到名为errorTag的流中
val process = source.process(new JsonStrCheckFunc<>(AccountUploadPojo.class, errorTag))
.returns(TypeInformation.of(AccountUploadPojo.class))
.setParallelism(2).name("json-parse");
//处理直接返回的数据
process.map(String::valueOf)
.addSink(new GenericSink(1) {
@Override
public void flush(List elements) {
log.warn(">>>>>> right str:{}", elements);
}
}).setParallelism(1).name("right-sink");
//获取名为errorTag流中的数据处理
process.getSideOutput(new OutputTag<>(errorTag, TypeInformation.of(String.class)))
.addSink(new GenericSink(1) {
@Override
public void flush(List elements) {
log.error(">>>>>> error str:{}", elements);
}
}).setParallelism(1).name("error-sink");
env.execute("ProcessApp");
}
}
输出流
实现sinkFunction即可
package flink.sink;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.util.ArrayList;
import java.util.List;
/**
* @author zhangxuecheng4441
* @date 2022/10/14/014 15:25
*/
public abstract class GenericSink extends RichSinkFunction {
private static final long serialVersionUID = -7594648194757224332L;
public final List dataCache;
private final int batchInterval;
private int batchCount = 0;
public GenericSink(int batchInterval) {
this.batchInterval = batchInterval;
dataCache = new ArrayList<>();
}
@Override
public void invoke(T record, Context context) {
if (record != null) {
++this.batchCount;
this.addBatch(record);
if (this.batchCount >= this.batchInterval) {
this.flush(dataCache);
dataCache.clear();
this.batchCount = 0;
}
}
}
/**
* 增加批量数据 缓存批写
*
* @param element element
*/
public void addBatch(T element) {
dataCache.add(element);
}
/**
* 数据入库
*
* @param elements elements
*/
public abstract void flush(List elements);
@Override
public void close() throws Exception {
if (!this.dataCache.isEmpty()) {
try {
this.flush(dataCache);
} finally {
this.dataCache.clear();
}
}
super.close();
}
}
仓库地址
https://github.com/opop32165455/flink-java-quickstart-1.14.git