> [zhangflink@9wmwtivvjuibcd2e package]$ tar -zxvf flink-1.16.0-bin-scala_2.12.tgz -C ../software/
[zhangflink@9wmwtivvjuibcd2e software]$ vim flink/conf/flink-conf.yaml
#设置jobmanager的机器地址
jobmanager.rpc.address: flinkv2
#设置允许访问jobmanager的机器地址,相当于白名单,0.0.0.0表示允许所有机器访问
jobmanager.bind-host: 0.0.0.0
#设置允许访问taskmanager的机器地址,相当于白名单,0.0.0.0表示允许所有机器访问
taskmanager.bind-host: 0.0.0.0
#设置taskmanager的机器地址,集群设置为当前机器所在地址,jobmanager那台机器不修改保持localhost地址。
taskmanager.host: flinkv1
#设置webui页面访问地址,如果没有设置或者IP地址错误会照成,集群即使启动成功,页面无法访问。
rest.bind-address: 0.0.0.0
[zhangflink@9wmwtivvjuibcd2e software]$ vim flink/conf/workers
#设置taskmanager地址
flinkv1
flinkv3
[zhangflink@9wmwtivvjuibcd2e software]$ xsync flink/conf/
启动集群在jobmanager那台机器启动
[zhangflink@9wmwtivvjuibcd2e-0001 flink]$ bin/start-cluster.sh
访问页面http://localhost:8081/#/overviewi页面slots卡槽数正确展示说明启动成功
org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: Connecting to remote task manager 'localhost/127.0.0.1:42231' has failed. This might indicate that the remote task manager has been lost.
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:169) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:135) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:96) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:95) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:186) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.internalRequestPartitions(SingleInputGate.java:342) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.requestPartitions(SingleInputGate.java:312) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.taskmanager.InputGateWithMetrics.requestPartitions(InputGateWithMetrics.java:115) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMail(MailboxProcessor.java:398) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsNonBlocking(MailboxProcessor.java:383) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:345) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:229) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:831) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:780) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:935) [flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:914) [flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) [flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) [flink-dist-1.16.0.jar:1.16.0]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]
Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: finishConnect(..) failed: 拒绝连接: localhost/127.0.0.1:42231
Caused by: java.net.ConnectException: finishConnect(..) failed: 拒绝连接
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Errors.newConnectException0(Errors.java:155) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Errors.handleConnectErrno(Errors.java:128) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Socket.finishConnect(Socket.java:320) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.doFinishConnect(AbstractEpollChannel.java:710) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.finishConnect(AbstractEpollChannel.java:687) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.epollOutReady(AbstractEpollChannel.java:567) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:470) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:378) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) ~[flink-dist-1.16.0.jar:1.16.0]
at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[flink-dist-1.16.0.jar:1.16.0]
... 1 more
2023-07-16 22:28:54,030 WARN org.apache.flink.runtime.taskmanager.Task [] - Sink: Print to Std. Out (2/2)#0 (40e492979c92fe282bc6ccbae6837fc3_0a448493b4782967b150582570326227_1_0) switched from RUNNING to FAILED with failure cause: org.apache.flink.runtime.io.network.partition.consumer.PartitionConnectionException: Connection for partition 76fdc87b7372a6e8895421b938589124#0@40e492979c92fe282bc6ccbae6837fc3_bc764cd8ddf7a0cff126f51c16239658_0_0 not reachable.
at org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:190)
at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.internalRequestPartitions(SingleInputGate.java:342)
at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.requestPartitions(SingleInputGate.java:312)
at org.apache.flink.runtime.taskmanager.InputGateWithMetrics.requestPartitions(InputGateWithMetrics.java:115)
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:50)
at org.apache.flink.streaming.runtime.tasks.mailbox.Mail.run(Mail.java:90)
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMail(MailboxProcessor.java:398)
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMailsNonBlocking(MailboxProcessor.java:383)
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.processMail(MailboxProcessor.java:345)
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:229)
at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:831)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:780)
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:935)
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:914)
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException: Connecting to remote task manager 'localhost/127.0.0.1:42231' has failed. This might indicate that the remote task manager has been lost.
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connect(PartitionRequestClientFactory.java:169)
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.connectWithRetries(PartitionRequestClientFactory.java:135)
at org.apache.flink.runtime.io.network.netty.PartitionRequestClientFactory.createPartitionRequestClient(PartitionRequestClientFactory.java:96)
at org.apache.flink.runtime.io.network.netty.NettyConnectionManager.createPartitionRequestClient(NettyConnectionManager.java:95)
at org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.requestSubpartition(RemoteInputChannel.java:186)
... 16 more
Caused by: org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannel$AnnotatedConnectException: finishConnect(..) failed: 拒绝连接: localhost/127.0.0.1:42231
Caused by: java.net.ConnectException: finishConnect(..) failed: 拒绝连接
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Errors.newConnectException0(Errors.java:155)
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Errors.handleConnectErrno(Errors.java:128)
at org.apache.flink.shaded.netty4.io.netty.channel.unix.Socket.finishConnect(Socket.java:320)
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.doFinishConnect(AbstractEpollChannel.java:710)
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.finishConnect(AbstractEpollChannel.java:687)
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.epollOutReady(AbstractEpollChannel.java:567)
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:470)
at org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:378)
at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(Thread.java:748)
解决方法,检查以下配置是否正确。
#设置taskmanager的机器地址,集群设置为当前机器所在地址,jobmanager那台机器不修改保持localhost地址。
taskmanager.host: flinkv1
Could not connect to rpc endpoint under address akka.tcp://flink@flinkv2:6123/user/rpc/resourcemanager_*.
解决方法:设置taskmanager和jobmanager允许所有机器连接。
#设置允许访问jobmanager的机器地址,相当于白名单,0.0.0.0表示允许所有机器访问
jobmanager.bind-host: 0.0.0.0
#设置允许访问taskmanager的机器地址,相当于白名单,0.0.0.0表示允许所有机器访问
taskmanager.bind-host: 0.0.0.0