官网对应地址: https://ci.apache.org/projects/flink/flink-docs-release-1.11/dev/table/python/python_udfs.html
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import StreamTableEnvironment, DataTypes
from pyflink.table.descriptors import Schema, OldCsv, FileSystem, Kafka, Json
from pyflink.table.udf import udf, TableFunction, ScalarFunction
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
t_env = StreamTableEnvironment.create(env)
t_env.get_config().get_configuration().set_string("python.fn-execution.memory.managed", 'true')
#1
# class Add(ScalarFunction):
# def eval(self, i, j):
# return i + j
# add = udf(Add(), [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
# t_env.register_function("add", add)
#2
add = udf(lambda i, j: i + j, [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
t_env.register_function("add", add)
#3
# @udf(input_types=[DataTypes.BIGINT(), DataTypes.BIGINT()], result_type=DataTypes.BIGINT())
# def add(i, j):
# return i + j
# t_env.register_function("add", add)
#4
# class CallableAdd(object):
# def __call__(self, i, j):
# return i + j
# add = udf(CallableAdd(), [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
# t_env.register_function("add", add)
#5
# def partial_add(i, j, k):
# return i + j + k
# add = udf(functools.partial(partial_add, k=1), [DataTypes.BIGINT(), DataTypes.BIGINT()],
# DataTypes.BIGINT())
# t_env.register_function("add", add)
t_env.sql_update("""
CREATE TABLE mySource (
a bigint,
b bigint
) WITH (
'connector' = 'kafka',
'topic' = 'mytesttopic',
'properties.bootstrap.servers' = '172.17.0.2:9092',
'properties.group.id' = 'flink-test-cxy',
'scan.startup.mode' = 'latest-offset',
'format' = 'json'
)
""")
t_env.sql_update("""
CREATE TABLE mySink (
a bigint,
b bigint
) WITH (
'connector' = 'print'
)
""")
t_env.sql_update("insert into mySink select a, add(a,b) from mySource")
t_env.execute("job")