restart-strategy: fixed-delay
restart-strategy.fixed-delay.attempts: 3
restart-strategy.fixed-delay.delay: 30s
execution.checkpointing.interval: 1min
execution.checkpointing.externsalized-checkpoint-retention: RETAIN_ON_CANCELLATION
state.checkpoints.dir: hdfs:///flink/checkpoints
state.backend: filesystem
public class datamaking {
public static void main( String[] args ) throws InterruptedException {
int len = 100000;
int sleepMilis = 1000;
// System.out.println("<生成id范围><每条数据停顿时长毫秒>");
if(args.length == 1){
len = Integer.valueOf(args[0]);
}
if(args.length == 2){
len = Integer.valueOf(args[0]);
sleepMilis = Integer.valueOf(args[1]);
}
Random random = new Random();
for(int i=0; i<10000000; i++){
System.out.println(i+"," + random.nextInt(len) );
Thread.sleep(sleepMilis);
}
}
}
#!/bin/bash
#for i in {1..100000};do echo $i,$RANDOM >> /opt/module/logs/mockLog.log; sleep 1s ;done;
nohup java -jar /opt/module/applog/DataMaking-1.0-SNAPSHOT.jar > /opt/module/applog/log/mockLog.log 2>&1 &
# 定义这个agent中各组件的名字
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# 描述和配置source组件:r1
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /opt/data/mockLog.log
# 描述和配置sink组件:k1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = jsonTopic
a1.sinks.k1.kafka.bootstrap.servers = leidi:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1
a1.sinks.k1.kafka.producer.linger.ms = 1
a1.sinks.ki.kafka.producer.compression.type = snappy
# 描述和配置channel组件,此处使用是内存缓存的方式
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# 描述和配置source channel sink之间的连接关系
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
(1)创建Kafkatopic
bin/kafka-topics.sh --create --topic jsonTopic --replication-factor 1 --partitions 1 --zookeeper localhost:2181
(2)启动Kafka消费者
bin/kafka-console-producer.sh --topic jsonTopic --broker-list localhost:6667
bin/kafka-console-consumer.sh --bootstrap-server leidi01:6667 --topic jsonTopic --from-beginning
(3)启动flume的agent
bin/flume-ng agent -c conf -f conf/flume-taildir-kafka.conf -n a1 -Dflume.root.logger=INFO,console
https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka_2.11/1.14.5
--开启带hive和kafka功能的FlinkSQL
./sql-client.sh embedded -s yarn-session
--开启checkpoint
SET execution.checkpointing.interval = 3s;
--传统制表形式打开查询结果
SET execution.result-mode=tableau;
use catalog default_catalog;
use default_database;
create table `default_catalog`.`default_database`.`kafka_behavior_log`
(
log STRING
) WITH (
'connector' = 'kafka',
'topic' = 'jsonTopic',
'properties.bootstrap.servers' = 'leidi01:6667',
'properties.group.id' = 'testGroup',
'scan.startup.mode' = 'earliest-offset',
'format' = 'raw'
)
select * from `default_catalog`.`default_database`.`kafka_behavior_log`;
insert into `default_catalog`.`default_database`.`kafka_behavior_log` values("1,hadoop")
Flink SQL> select * from kafka_behavior_log_raw;
--创建Hive表
create table `behavior_log`(
log STRING
) STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler';
insert into `behavior_log` select log from `default_catalog`.`default_database`.`kafka_behavior_log`
no main manifest attribute, in original-DataMaking-1.0-SNAPSHOT.jar
报错原因:java包运行时找不到主类
解决方案:pom文件中添加相关配置文件
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-pluginartifactId>
<version>3.8.0version>
<configuration>
<source>1.8source>
<target>1.8target>
configuration>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<version>3.2.1version>
<executions>
<execution>
<phase>packagephase>
<goals>
<goal>shadegoal>
goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.dx.DataMakingmainClass>
transformer>
transformers>
configuration>
execution>
executions>
plugin>
plugins>
build>
[2022-10-11 17:16:33,513] WARN [Consumer clientId=consumer-1, groupId=console-consumer-33906] Connection to node -1 could not be established. Broker may not be available. (org.apache.kafka.clients.NetworkClient)
问题现象:FlinkSQL无法读取外部数据
问题原因:FlinkSQL没有设置Checkpoint
解决方案:
SET 'execution.checkpointing.interval' = '3s'
Caused by: java.lang.ClassNotFoundException: org.apache.kafka.clients.consumer.ConsumerRecord