下载git clone https://github.com/apache/flink
set MAVEN_OPTS="-Xmx4G"
mvn clean install package -Dmaven.test.skip=true
set MAVEN_OPTS="-Xmx4G"
mvn clean install package ‘-Dmaven.test.skip=true’
cd E:\app-installtools\flink\flink-dist\target\flink-1.9-SNAPSHOT-bin\flink-1.9-SNAPSHOT\bin
Mode LastWriteTime Length Name
---- ------------- ------ ----
-a---- 2019/4/24 11:50 29647 config.sh
-a---- 2019/4/24 11:50 2279 flink
-a---- 2019/4/24 11:50 2847 flink-console.sh
-a---- 2019/4/24 11:50 6545 flink-daemon.sh
-a---- 2019/4/24 11:50 1271 flink.bat
-a---- 2019/4/24 11:50 1603 historyserver.sh
-a---- 2019/4/24 11:50 2967 jobmanager.sh
-a---- 2019/4/24 11:50 1849 mesos-appmaster-job.sh
-a---- 2019/4/24 11:50 1883 mesos-appmaster.sh
-a---- 2019/4/24 11:50 1935 mesos-taskmanager.sh
-a---- 2019/4/24 11:50 1207 pyflink-stream.sh
-a---- 2019/4/24 11:50 1166 pyflink.bat
-a---- 2019/4/24 11:50 1132 pyflink.sh
-a---- 2019/4/24 11:50 3517 sql-client.sh
-a---- 2019/4/24 11:50 2597 standalone-job.sh
-a---- 2019/4/24 11:50 3364 start-cluster.bat
-a---- 2019/4/24 11:50 1889 start-cluster.sh
-a---- 2019/4/24 11:50 3538 start-scala-shell.sh
-a---- 2019/4/24 11:50 1900 start-zookeeper-quorum.sh
-a---- 2019/4/24 11:50 1663 stop-cluster.sh
-a---- 2019/4/24 11:50 1891 stop-zookeeper-quorum.sh
-a---- 2019/4/24 11:50 3941 taskmanager.sh
-a---- 2019/4/24 11:50 1714 yarn-session.sh
-a---- 2019/4/24 11:50 2346 zookeeper.sh
E:\app-installtools\flink\flink-dist\target\flink-1.9-SNAPSHOT-bin\flink-1.9-SNAPSHOT\bin\start-cluster.bat
Web interface by default on http://localhost:8081/.
from flink.plan.Environment import get_environment
from flink.functions.GroupReduceFunction import GroupReduceFunction
class Adder(GroupReduceFunction):
def reduce(self, iterator, collector):
count, word = iterator.next()
count += sum([x[0] for x in iterator])
collector.collect((count, word))
env = get_environment()
data = env.from_elements("Who's there?",
"I think I hear them. Stand, ho! Who's there?")
data \
.flat_map(lambda x, c: [(1, word) for word in x.lower().split()]) \
.group_by(1) \
.reduce_group(Adder(), combinable=True) \
.map(lambda y: 'Count: %s Word: %s' % (y[0], y[1])) \
.output()
# Out[6]:
env.execute(local=True)
Count: 2 Word: i
Count: 1 Word: ho!
Count: 1 Word: hear
Count: 1 Word: them.
Count: 1 Word: think
Count: 2 Word: who's
Count: 1 Word: stand,
Count: 2 Word: there?
输入一个元素,输出一个元素
data.map(lambda x: x * 2)
输入一个元素,输出0个,1个,或多个元素
data.flat_map(
lambda x,c: [(1,word) for word in line.lower().split() for line
in x])
data.map_partition(lambda x,c: [value * 2 for value in x])
对每一个元素,计算一个布尔表达式的值,保留函数计算结果为true的元素。
data.filter(lambda x: x > 1000)
data.reduce(lambda x,y : x + y)
class Adder(GroupReduceFunction):
def reduce(self, iterator, collector):
count, word = iterator.next()
count += sum([x[0] for x in iterator)
collector.collect((count, word))
data.reduce_group(Adder())
from flink.plan.Environment import get_environment
from flink.plan.Constants import INT, STRING, WriteMode
from flink.functions.GroupReduceFunction import GroupReduceFunction
class Adder(GroupReduceFunction):
def reduce(self, iterator, collector):
count, word = iterator.next()
count += sum([x[0] for x in iterator])
collector.collect((count, word))
env = get_environment()
data = env.from_elements("Who's there?",
"I think I hear them. Stand, ho! Who's there?")
output_file = 'file:///../examples/out.txt'
data \
.flat_map(lambda x, c: [(1, word) for word in x.lower().split()]) \
.group_by(1) \
.reduce_group(Adder(), combinable=True) \
.map(lambda y: 'Count: %s Word: %s' % (y[0], y[1])) \
.write_text('out.txt', write_mode=WriteMode.OVERWRITE)
# Out[6]:
env.execute(local=True)