flink版本:1.13.1
scala版本:2.12
<properties>
<flink.version>1.13.1</flink.version>
<scala.version>2.12</scala.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- 实现自定义的数据格式来做序列化,可以引入下面的依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
import com.flink.dto.Event;
import com.flink.source.ClickSource;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import java.time.Duration;
import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.call;
public class SqlUdfTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
SingleOutputStreamOperator<Event> dataStream = env.addSource(new ClickSource())
.assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.getTimestamp();
}
}));
tableEnv.createTemporaryView("table_click", dataStream);
Table tableClick = tableEnv.fromDataStream(dataStream);
// 1. 注册标量函数
tableEnv.createTemporaryFunction("HashFunction", HashFunction.class);
// 2. sql执行标量函数(推荐使用)
Table sqlResult = tableEnv.sqlQuery("select user, HashFunction(url) from table_click");
// 3. table 执行标量函数(不推荐)
Table tableResult = tableClick.select($("user"), call("HashFunction", $("user")));
// 4. 打印输出
//tableEnv.toDataStream(tableResult).print("tableResult");
tableEnv.toDataStream(sqlResult).print("sqlResult");
env.execute();
}
/**
* 自定义UDF标量函数
*/
public static class HashFunction extends ScalarFunction {
// 转换得方法名是eval,必须自己手动写
public String eval(String val) {
return val + "_" + val.hashCode();
}
}
}
输出结果
sqlResult> +I[user_3, /product?id=3_79635664]
sqlResult> +I[user_3, /product?id=3_79635664]
sqlResult> +I[user_4, /product?id=4_79635665]
sqlResult> +I[user_1, /product?id=1_79635662]
sqlResult> +I[user_1, /product?id=1_79635662]
import com.flink.dto.Event;
import com.flink.source.ClickSource;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TableFunction;
import java.time.Duration;
public class Udf_tableFunction {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
SingleOutputStreamOperator<Event> dataStream = env.addSource(new ClickSource())
.assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.getTimestamp();
}
}));
tableEnv.createTemporaryView("table_click", dataStream);
Table tableClick = tableEnv.fromDataStream(dataStream);
// 1. 注册函数
tableEnv.createTemporaryFunction("Split", SplitFunction.class);
// 2.查询
Table sqlResult = tableEnv.sqlQuery("select user, url, word, length " +
"from table_click, LATERAL TABLE(Split(url)) AS T(word, length)");
dataStream.print("source");
tableEnv.toDataStream(sqlResult).print("sqlResult");
env.execute();
}
/**
* 创建table Function
*/
public static class SplitFunction extends TableFunction<Tuple2<String, Integer>> {
// 转换得方法名是eval,必须自己手动写,通过调用super.collect(T)输出
public void eval(String val) {
String[] splits = val.split("\\?");
for(String filed: splits) {
super.collect(Tuple2.of(filed, filed.length()));
}
}
}
}
输出
source> Event{user='user_1', url='/product?id=1', timestamp=2022-4-15 16:09:14}
sqlResult> +I[user_1, /product?id=1, /product, 8]
sqlResult> +I[user_1, /product?id=1, id=1, 4]
source> Event{user='user_0', url='/product?id=0', timestamp=2022-4-15 16:09:15}
sqlResult> +I[user_0, /product?id=0, /product, 8]
sqlResult> +I[user_0, /product?id=0, id=0, 4]
source> Event{user='user_0', url='/product?id=0', timestamp=2022-4-15 16:09:16}
sqlResult> +I[user_0, /product?id=0, /product, 8]
sqlResult> +I[user_0, /product?id=0, id=0, 4]
import com.flink.dto.Event;
import com.flink.source.ClickSource;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;
import java.time.Duration;
public class UDF_AggFunction {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
SingleOutputStreamOperator<Event> dataStream = env.addSource(new ClickSource())
.assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.getTimestamp();
}
}));
tableEnv.createTemporaryView("table_click", dataStream);
Table tableClick = tableEnv.fromDataStream(dataStream);
// 1. 注册函数
tableEnv.createTemporaryFunction("WeightAgg", WeightAggFunction.class);
// 2.查询
Table sqlResult = tableEnv.sqlQuery("select user, WeightAgg(user,2) " +
"from table_click group by user");
dataStream.print("source");
tableEnv.toChangelogStream(sqlResult).print("sqlResult");
env.execute();
}
public static class WeightAggFunction extends AggregateFunction<String, WeightedAvgAccumulator> {
@Override
public String getValue(WeightedAvgAccumulator acc) {
return String.valueOf(acc.sum);
}
@Override
public WeightedAvgAccumulator createAccumulator() {
return new WeightedAvgAccumulator();
}
// 累加计算方法,每来一行数据都会调用
// 累加器WeightAgg(user,1)与参数2,3意义对应
public void accumulate(WeightedAvgAccumulator acc, String iValue, Integer
iWeight) {
String[] s = iValue.split("_");
acc.sum += Integer.valueOf(s[1]) * iWeight;
}
}
// 累加器类型定义
public static class WeightedAvgAccumulator {
public long sum = 0; // 加权和
}
}