本文主要是为Flink的java客户端使用和flink-sql使用的大致介绍,具体使用查看文档页面。
Apache Flink Documentation | Apache Flink
4.0.0
org.example
flink_test
1.0-SNAPSHOT
org.apache.flink
flink-java
1.15.4
org.apache.flink
flink-streaming-java
1.15.4
org.apache.flink
flink-connector-kafka
1.15.4
org.apache.kafka
kafka-clients
2.8.1
org.json
json
20210307
org.apache.flink
flink-clients
1.15.4
org.apache.flink
flink-connector-jdbc
1.15.4
mysql
mysql-connector-java
8.0.25
org.apache.maven.plugins
maven-assembly-plugin
3.3.0
com.KafkaDataProcessor
UTF-8
jar-with-dependencies
make-assembly
package
single
org.apache.maven.plugins
maven-compiler-plugin
3.8.1
1.8
-Xlint:unchecked
-Xlint:deprecation
KafkaFlinkExample
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.util.Properties;
public class KafkaFlinkExample {
public static void main(String[] args) throws Exception {
// 设置 Flink 程序的执行环境
// StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.10.153:9092");
props.setProperty("group.id", "test");
FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("input-topic", new SimpleStringSchema(), props);
env.addSource(consumer)
.map(data -> "Received: " + data)
.print();
env.execute("Kafka Flink Example");
}
}
KafkaDataProcessor
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.json.JSONObject;
import java.util.Properties;
public class KafkaDataProcessor {
public static void main(String[] args) throws Exception {
// 设置 Flink 程序的执行环境
// StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 创建一个本地流执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
// 设置 Kafka 的配置信息
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.10.153:9092");
properties.setProperty("group.id", "flink-consumer-group");
// 创建 Kafka 消费者,并从指定的 topic 中读取数据
FlinkKafkaConsumer kafkaConsumer = new FlinkKafkaConsumer<>("input-topic", new SimpleStringSchema(), properties);
DataStream kafkaDataStream = env.addSource(kafkaConsumer);
// 将 JSON 数据解析并添加性别字段
DataStream processedDataStream = kafkaDataStream.map(new MapFunction() {
@Override
public String map(String value) throws Exception {
// 解析 JSON 数据
JSONObject jsonObject = new JSONObject(value);
String name = jsonObject.getString("name");
int id = jsonObject.getInt("id");
int age = jsonObject.getInt("age");
// 根据姓名判断性别
String gender;
if (name.equals("jack")) {
gender = "male_xxx";
} else {
gender = "female_xxx";
}
// 构造新的 JSON 数据
JSONObject newJsonObject = new JSONObject();
newJsonObject.put("name", name);
newJsonObject.put("id", id);
newJsonObject.put("age", age);
newJsonObject.put("gender", gender);
return newJsonObject.toString();
}
});
// 创建 Kafka 生产者,并将新的数据写入指定的 topic
FlinkKafkaProducer kafkaProducer = new FlinkKafkaProducer<>("output-topic", new SimpleStringSchema(), properties);
processedDataStream.addSink(kafkaProducer);
// 执行程序
env.execute("Kafka Data Processor");
}
}
LocalWebUI
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.util.Properties;
public class LocalWebUI {
public static void main(String[] args) throws Exception {
//StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration configuration = new Configuration();
//创建一个带webUI的本地执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
int parallelism = env.getParallelism();
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.10.153:9092");
props.setProperty("group.id", "test");
FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("input-topic", new SimpleStringSchema(), props);
System.out.println("执行环境的并行度:" + parallelism);
// DataStreamSource lines = env.socketTextStream("localhost", 8888);
DataStreamSource lines = env.addSource(consumer);
int parallelism1 = lines.getParallelism();
System.out.println("socketTextStream创建的DataStreamSource的并行度:" + parallelism1);
SingleOutputStreamOperator uppered = lines.map(line -> line.toUpperCase());
int parallelism2 = uppered.getParallelism();
System.out.println("调用完map方法得到的DataStream的并行度:" + parallelism2);
DataStreamSink print = uppered.print();
int parallelism3 = print.getTransformation().getParallelism();
System.out.println("调用完print方法得到的DataStreamSink的并行度:" + parallelism3);
env.execute();
}
}
Flink可以和Spark类似,开发过程中,在本地临时执行,需要两个条件
1.需要flink-client依赖引入,否则会报No ExecutorFactory found to execute the application
org.apache.flink flink-clients 1.15.4
2.设置flink的执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
打包
mvn clean package
提交任务
flink run -c com.KafkaDataProcessor /root/flink_test-1.0-SNAPSHOT-jar-with-dependencies.jar
观察任务状态
Job---->>Running Jobs
结束任务
Job---->>Running Jobs--->>点击任务---->>Cannel Job
https://nightlies.apache.org/flink/flink-docs-release-1.14/zh/docs/dev/table/common/
汇总kafka数据,将结果保存入mysql中
mysql版本是8.0.25,flink版本是1.15.4,connector的版本一定要和flink版本保持一致。所有集群节点的lib一定要保持一致然后重启。
依赖下载位置:Central Repository:
mysql-connector-java-8.0.25.jar
flink-connector-jdbc-1.15.4.jar
kafka-clients-2.8.1.jar
flink-connector-kafka-1.15.4.jar
cd /root/flink-1.15.4/bin
./sql-client.sh
CREATE TABLE sync_test_1 (
`day_time` varchar(64) NOT NULL,
`total_gmv` bigint(11) DEFAULT NULL,
PRIMARY KEY (`day_time`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
create table flink_test_1 (
id BIGINT,
day_time VARCHAR,
amnount BIGINT,
proctime AS PROCTIME ()
)
with (
'connector' = 'kafka',
'topic' = 'flink_test',
'properties.bootstrap.servers' = '192.168.10.153:9092',
'properties.group.id' = 'flink_gp_test1',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json',
'json.fail-on-missing-field' = 'false',
'json.ignore-parse-errors' = 'true',
'properties.zookeeper.connect' = '192.168.10.153:2181/kafka'
);
CREATE TABLE sync_test_1 (
day_time string,
total_gmv bigint,
PRIMARY KEY (day_time) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://192.168.10.151:3306/flink_web?serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8&useSSL=false',
'table-name' = 'sync_test_1',
'username' = 'root',
'password' = '123456'
);
INSERT INTO sync_test_1
SELECT day_time,SUM(amnount) AS total_gmv
FROM flink_test_1
GROUP BY day_time;
./bin/kafka-console-producer.sh --bootstrap-server 192.168.10.153:9092 --topic flink_test
{"day_time": "20201009","id": 7,"amnount":20}
查看数据结果
来源:
docs/sql_demo/demo_1.md · 无情(朱慧培)/flink-streaming-platform-web - Gitee.com
flink-sql大量使用案例_flink sql使用_第一片心意的博客-CSDN博客