以案例形式,讲解Table API和SQL 基本使用,分别针对批处理和流计算使用Table API和SQL分析数据。
首先看一下Flink Table API和SQL使用,构建应用步骤。
第一步、添加依赖
第2步、具体提供API
目前新版本(当前使用版本Flink 1.10)Flink的Table和SQL的API还不够稳定,依然在不断完善中,所以课程中的案例还是以老版本文档的API来演示。
1)、获取环境
ExecutionEnvironment
和BatchTableEnvironment
StreamExecutionEnvironment
和StreamTableEnvironment
/**
* @author liu a fu
* @version 1.0
* @date 2021/3/11 0011
* @DESC Flink SQL API 针对批处理实现词频统计WordCount SQL风格
*/
public class BatchWordCountSQLDemo {
public static void main(String[] args) throws Exception {
//1-环境的准备
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//TODO: 获取Table执行的环境
BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);
// 2. 数据源-source
// 模拟测试数据集
DataSource<WordCount> inputDataSet = env.fromElements(
new WordCount("flink", 1L), new WordCount("flink", 1L),
new WordCount("spark", 1L), new WordCount("spark", 1L),
new WordCount("flink", 1L), new WordCount("hive", 1L),
new WordCount("flink", 1L), new WordCount("spark", 1L)
);
//TODO: 将DataStream转换为Table 创建临时视图
tableEnv.createTemporaryView("word_count",inputDataSet,"word, counts");
//3-数据的转换 Transformation
// TODO: 编写SQL分析数据
Table wcTable = tableEnv.sqlQuery("SELECT word,SUM(counts) AS counts from word_count group by word order by counts desc");
// TODO: 转换Table为DataSet
DataSet<WordCount> resultDataSet = tableEnv.toDataSet(wcTable, WordCount.class);
//4-数据的sink
resultDataSet.printToErr();
// 5. 触发执行-execute
//env.execute(BatchWordCountSQLDemo.class.getSimpleName()) ;
}
}
Table API DSL分析:
/**
* @author liu a fu
* @version 1.0
* @date 2021/3/11 0011
* @DESC
*/
public class BatchWordCountTableDemo {
public static void main(String[] args) throws Exception {
// 1. 执行环境-env
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1) ;
// TODO: 获取Table执行环境
BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);
// 2. 数据源-source
// 模拟测试数据集
DataSource<WordCount> inputDataSet = env.fromElements(
new WordCount("flink", 1L), new WordCount("flink", 1L),
new WordCount("spark", 1L), new WordCount("spark", 1L),
new WordCount("flink", 1L), new WordCount("hive", 1L),
new WordCount("flink", 1L), new WordCount("spark", 1L)
);
// TODO: 将DataStream转换为Table
Table table = tableEnv.fromDataSet(inputDataSet);
// 3. 数据转换-transformation
// TODO: 编写DSL分析数据
/*
Flink Table API使用,就是将SQL语句,拆解以后,放到对应函数中
SELECT word, SUM(counts) AS counts FROM word_count GROUP BY word ORDER BY counts DESC
*/
Table resultTable = table
.groupBy("word") // 按照什么分组
.select("word, SUM(counts) AS counts") // 选择字段
.orderBy("counts.desc");
// TODO: 转换Table为DataSet
DataSet<WordCount> resultDataSet = tableEnv.toDataSet(resultTable, WordCount.class);
// 4. 数据终端-sink
resultDataSet.printToErr();
// 5. 触发执行-execute
//env.execute(BatchWordCountSQLDemo.class.getSimpleName()) ;
}
}
/**
* Flink SQL流式数据处理案例演示,官方Example案例。
*/
public class StreamSQLDemo {
@Data
@NoArgsConstructor
@AllArgsConstructor
public static class Order {
public Long user;
public String product;
public Integer amount;
}
public static void main(String[] args) throws Exception {
// 1. 执行环境-env
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1) ;
// TODO: 构建Stream Table 执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 2. 数据源-source
// 模拟数据集
DataStream<Order> orderA = env.fromCollection(Arrays.asList(
new Order(1001L, "beer", 3),
new Order(1001L, "diaper", 4),
new Order(1003L, "rubber", 2)
));
DataStream<Order> orderB = env.fromCollection(Arrays.asList(
new Order(1002L, "pen", 3),
new Order(1002L, "rubber", 3),
new Order(1004L, "beer", 1)
));
// TODO: 将DataStream转换Table
Table tableA = tableEnv.fromDataStream(orderA, "user, product, amount");
tableEnv.createTemporaryView("orderB", orderB, "user, product, amount");
// 3. 数据转换-transformation
//使用SQL查询数据,分别对2个表进行数据查询,将结果合并
Table resultTable = tableEnv.sqlQuery(
"SELECT * FROM " + tableA + " WHERE amount > 2 union all SELECT * FROM orderB WHERE amount > 2 "
);
// 将Table转换为DataStream
DataStream<Order> resultDataStream = tableEnv.toAppendStream(resultTable, Order.class);
// 4. 数据终端-sink
resultDataStream.printToErr();
// 5. 触发执行-execute
env.execute(StreamSQLDemo.class.getSimpleName()) ;
}
}
Table API DSL分析:
/**
* Flink Table API使用,基于事件时间窗口统计分析:
*
*/
public class StreamWindowTableDemo {
public static void main(String[] args) throws Exception {
// 1. 执行环境-env
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1) ;
// step1. 设置时间语义
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// TODO: 创建 Stream Table执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 2. 数据源-source
DataStreamSource<String> inputStream = env.socketTextStream("node1.itcast.cn", 9999);
// 3. 数据转换-transformation
/*
1,beer,3,2020-12-12 00:00:01
1,diaper,4,2020-12-12 00:00:02
2,pen,3,2020-12-12 00:00:04
2,rubber,3,2020-12-12 00:00:06
3,rubber,2,2020-12-12 00:00:05
4,beer,1,2020-12-12 00:00:08
2,rubber,3,2020-12-12 00:00:10
3,rubber,2,2020-12-12 00:00:10
*/
// step2. 指定事件时间字段,必须是Long类型时间戳,考虑乱序数据处理(水位线Watermark)
FastDateFormat format = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss");
SingleOutputStreamOperator<String> timeStream = inputStream
.filter(line -> null != line && line.trim().split(",").length == 4)
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
@Override
public long extractTimestamp(String line) {
String orderTime = line.trim().split(",")[3];
try {
return format.parse(orderTime).getTime();
} catch (ParseException e) {
e.printStackTrace();
return System.currentTimeMillis();
}
}
});
// 提取字段数据
SingleOutputStreamOperator<Row> orderStream = timeStream.map(new MapFunction<String, Row>() {
@Override
public Row map(String line) throws Exception {
// 数据格式:2,rubber,3,2020-12-12 00:00:10
String[] split = line.trim().split(",");
String userId = split[0];
String productName = split[1];
Integer amount = Integer.parseInt(split[2]);
Long orderTime = format.parse(split[3]).getTime();
// 返回对象,Row类型
return Row.of(orderTime, userId, productName, amount);
}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.INT)) ;
// TODO:将DataStream转换Table
tableEnv.createTemporaryView(
"t_orders",//
orderStream, //
"order_time, user_id, product_name, amount, event_time.rowtime"
);
Table resultTable = tableEnv
.from("t_orders")
// 设置基于事件时间滚动窗口
.window(Tumble.over("5.seconds").on("event_time").as("win"))
// 先对窗口分组,再对窗口中数据按照用户
.groupBy("win, user_id")
// 聚合计算
.select("win.start, win.end, user_id, amount.sum as total");
// TODO: 将DataStream转换为Table
DataStream<Row> resultStream = tableEnv.toAppendStream(resultTable, Row.class);
// 4. 数据终端-sink
resultStream.printToErr();
// 5. 触发执行-execute
env.execute(StreamWindowTableDemo.class.getSimpleName()) ;
}
}