1.11.0 pyflink中使用udf例子,多种方式实现

官网对应地址: https://ci.apache.org/projects/flink/flink-docs-release-1.11/dev/table/python/python_udfs.html

from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import StreamTableEnvironment, DataTypes
from pyflink.table.descriptors import Schema, OldCsv, FileSystem, Kafka, Json
from pyflink.table.udf import udf, TableFunction, ScalarFunction

env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
t_env = StreamTableEnvironment.create(env)
t_env.get_config().get_configuration().set_string("python.fn-execution.memory.managed", 'true')
#1
# class Add(ScalarFunction):
#   def eval(self, i, j):
#     return i + j
# add = udf(Add(), [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
# t_env.register_function("add", add)

#2
add = udf(lambda i, j: i + j, [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
t_env.register_function("add", add)

#3
# @udf(input_types=[DataTypes.BIGINT(), DataTypes.BIGINT()], result_type=DataTypes.BIGINT())
# def add(i, j):
# 	return i + j
# t_env.register_function("add", add)

#4
# class CallableAdd(object):
#   def __call__(self, i, j):
#     return i + j
# add = udf(CallableAdd(), [DataTypes.BIGINT(), DataTypes.BIGINT()], DataTypes.BIGINT())
# t_env.register_function("add", add)

#5
# def partial_add(i, j, k):
#   return i + j + k
# add = udf(functools.partial(partial_add, k=1), [DataTypes.BIGINT(), DataTypes.BIGINT()],
#           DataTypes.BIGINT())
# t_env.register_function("add", add)

t_env.sql_update("""
	CREATE TABLE mySource (                                       
		a bigint,                                                    
		b bigint                                                
	) WITH (                                                         
	'connector' = 'kafka',
	'topic' = 'mytesttopic',
	'properties.bootstrap.servers' = '172.17.0.2:9092',
	'properties.group.id' = 'flink-test-cxy',
	'scan.startup.mode' = 'latest-offset',
	'format' = 'json'                                        
	) 
""")
t_env.sql_update("""
	CREATE TABLE mySink (                                       
		a bigint,                                                    
		b bigint                                                
	) WITH (                                                         
	'connector' = 'print'       
	) 
""")
t_env.sql_update("insert into mySink select a, add(a,b) from mySource")
t_env.execute("job")

 

你可能感兴趣的:(flink,python)