搭建Zookeeper集群;
下载安装依赖环境python
下载并解压Storm发布版本;
修改storm.yaml配置文件;
启动Storm各个后台进程。
注意:依赖在此之前必须安装jdk7
storm.yaml 参考说明:
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with
this
work
for
additional information
# regarding copyright ownership. The ASF licenses
this
file
# to you under the Apache License, Version
2.0
(the
#
"License"
); you may not use
this
file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:
//www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an
"AS IS"
BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License
for
the specific language governing permissions and
# limitations under the License.
########### These all have
default
values as shown
########### Additional configuration goes into storm.yaml
java.library.path:
"/usr/local/lib:/opt/local/lib:/usr/lib"
### storm.* configs are general configurations
# the local dir is where jars are kept
storm.local.dir:
"storm-local"
storm.zookeeper.servers:
-
"127.0.0.1"
storm.zookeeper.port:
2181
storm.zookeeper.root:
"/storm"
storm.zookeeper.session.timeout:
20000
storm.zookeeper.connection.timeout:
15000
storm.zookeeper.retry.times:
5
storm.zookeeper.retry.interval:
1000
storm.zookeeper.retry.intervalceiling.millis:
30000
storm.zookeeper.auth.user:
null
storm.zookeeper.auth.password:
null
storm.cluster.mode:
"distributed"
# can be distributed or local
storm.local.mode.zmq:
false
storm.thrift.transport:
"backtype.storm.security.auth.SimpleTransportPlugin"
storm.principal.tolocal:
"backtype.storm.security.auth.DefaultPrincipalToLocal"
storm.group.mapping.service:
"backtype.storm.security.auth.ShellBasedGroupsMapping"
storm.messaging.transport:
"backtype.storm.messaging.netty.Context"
storm.nimbus.retry.times:
5
storm.nimbus.retry.interval.millis:
2000
storm.nimbus.retry.intervalceiling.millis:
60000
storm.auth.simple-white-list.users: []
storm.auth.simple-acl.users: []
storm.auth.simple-acl.users.commands: []
storm.auth.simple-acl.admins: []
### nimbus.* configs are
for
the master
nimbus.host:
"192.168.1.57"
nimbus.thrift.port:
6627
nimbus.thrift.threads:
64
nimbus.thrift.max_buffer_size:
1048576
nimbus.childopts:
"-Xmx1024m"
nimbus.task.timeout.secs:
30
nimbus.supervisor.timeout.secs:
60
nimbus.monitor.freq.secs:
10
nimbus.cleanup.inbox.freq.secs:
600
nimbus.inbox.jar.expiration.secs:
3600
nimbus.task.launch.secs:
120
nimbus.reassign:
true
nimbus.file.copy.expiration.secs:
600
nimbus.topology.validator:
"backtype.storm.nimbus.DefaultTopologyValidator"
nimbus.credential.renewers.freq.secs:
600
### ui.* configs are
for
the master
ui.host:
0.0
.
0.0
ui.port:
8080
ui.childopts:
"-Xmx768m"
ui.actions.enabled:
true
ui.filter:
null
ui.filter.params:
null
ui.users:
null
ui.header.buffer.bytes:
4096
ui.http.creds.plugin: backtype.storm.security.auth.DefaultHttpCredentialsPlugin
logviewer.port:
8000
logviewer.childopts:
"-Xmx128m"
logviewer.cleanup.age.mins:
10080
logviewer.appender.name:
"A1"
logs.users:
null
transactional.zookeeper.root:
"/transactional"
transactional.zookeeper.servers:
null
transactional.zookeeper.port:
null
### supervisor.* configs are
for
node supervisors
# Define the amount of workers that can be run on
this
machine. Each worker is assigned a port to use
for
communication
supervisor.slots.ports:
-
6700
-
6701
-
6702
-
6703
supervisor.childopts:
"-Xmx256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -verbose:gc -Xloggc:D:\\developers\\tools\\apache-storm-0.9.4\\logs\\gc-storm-supervisor.log"
supervisor.run.worker.as.user:
false
#how
long
supervisor will wait to ensure that a worker process is started
supervisor.worker.start.timeout.secs:
120
#how
long
between heartbeats until supervisor considers that worker dead and tries to restart it
supervisor.worker.timeout.secs:
30
#how many seconds to sleep
for
before shutting down threads on worker
supervisor.worker.shutdown.sleep.secs:
1
#how frequently the supervisor checks on the status of the processes it's monitoring and restarts
if
necessary
supervisor.monitor.frequency.secs:
3
#how frequently the supervisor heartbeats to the cluster state (
for
nimbus)
supervisor.heartbeat.frequency.secs:
5
supervisor.enable:
true
supervisor.supervisors: []
supervisor.supervisors.commands: []
### worker.* configs are
for
task workers
worker.childopts:
"-Xmx768m -verbose:gc -Xloggc:D:\\developers\\tools\\apache-storm-0.9.4\\logs\\gc-storm-worker-%ID%.log -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=D:\\developers\\tools\\apache-storm-0.9.4\\logs\\dump"
worker.gc.childopts:
"-XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75"
worker.heartbeat.frequency.secs:
1
# control how many worker receiver threads we need per worker
topology.worker.receiver.thread.count:
1
task.heartbeat.frequency.secs:
3
task.refresh.poll.secs:
10
task.credentials.poll.secs:
30
zmq.threads:
1
zmq.linger.millis:
5000
zmq.hwm:
0
storm.messaging.netty.server_worker_threads:
1
storm.messaging.netty.client_worker_threads:
1
storm.messaging.netty.buffer_size:
5242880
#5MB buffer
# Since nimbus.task.launch.secs and supervisor.worker.start.timeout.secs are
120
, other workers should also wait at least that
long
before giving up on connecting to the other worker. The reconnection period need also be bigger than storm.zookeeper.session.timeout(
default
is 20s), so that we can abort the reconnection when the target worker is dead.
storm.messaging.netty.max_retries:
300
storm.messaging.netty.max_wait_ms:
1000
storm.messaging.netty.min_wait_ms:
100
# If the Netty messaging layer is busy(netty internal buffer not writable), the Netty client will
try
to batch message as more as possible up to the size of storm.messaging.netty.transfer.batch.size bytes, otherwise it will
try
to flush message as soon as possible to reduce latency.
storm.messaging.netty.transfer.batch.size:
262144
# Sets the backlog value to specify when the channel binds to a local address
storm.messaging.netty.socket.backlog:
500
# We check with
this
interval that whether the Netty channel is writable and
try
to write pending messages
if
it is.
storm.messaging.netty.flush.check.interval.ms:
10
# By
default
, the Netty SASL authentication is set to
false
. Users can override and set it
true
for
a specific topology.
storm.messaging.netty.authentication:
false
#
default
number of seconds group mapping service will cache user group
storm.group.mapping.service.cache.duration.secs:
120
### topology.* configs are
for
specific executing storms
topology.enable.message.timeouts:
true
topology.debug:
false
topology.workers:
1
topology.acker.executors:
null
topology.tasks:
null
# maximum amount of time a message has to complete before it's considered failed
topology.message.timeout.secs:
30
topology.multilang.serializer:
"backtype.storm.multilang.JsonSerializer"
topology.skip.missing.kryo.registrations:
false
topology.max.task.parallelism:
null
topology.max.spout.pending:
null
topology.state.synchronization.timeout.secs:
60
topology.stats.sample.rate:
0.05
topology.builtin.metrics.bucket.size.secs:
60
topology.fall.back.on.java.serialization:
true
topology.worker.childopts:
null
topology.executor.receive.buffer.size:
1024
#batched
topology.executor.send.buffer.size:
1024
#individual messages
topology.receiver.buffer.size:
8
# setting it too high causes a lot of problems (heartbeat thread gets starved, throughput plummets)
topology.transfer.buffer.size:
1024
# batched
topology.tick.tuple.freq.secs:
null
topology.worker.shared.thread.pool.size:
4
topology.disruptor.wait.strategy:
"com.lmax.disruptor.BlockingWaitStrategy"
topology.spout.wait.strategy:
"backtype.storm.spout.SleepSpoutWaitStrategy"
topology.sleep.spout.wait.strategy.time.ms:
1
topology.error.throttle.interval.secs:
10
topology.max.error.report.per.interval:
5
topology.kryo.factory:
"backtype.storm.serialization.DefaultKryoFactory"
topology.tuple.serializer:
"backtype.storm.serialization.types.ListDelegateSerializer"
topology.trident.batch.emit.interval.millis:
500
topology.testing.always.
try
.serialize:
false
topology.classpath:
null
topology.environment:
null
topology.bolts.outgoing.overflow.buffer.enable:
false
dev.zookeeper.path:
"/tmp/dev-storm-zookeeper"
|
1)
storm.zookeeper.servers: Storm集群使用的Zookeeper集群地址,其格式如下
storm.zookeeper.servers:
-
"192.168.1.225"
# -
"server2"
|
2) storm.local.dir: Nimbus和Supervisor进程用于存储少量状态,如jars、confs等的本地磁盘目录,需要提前创建该目录并给以足够的访问权限。然后在storm.yaml中配置该目录,如:
storm.local.dir:
"/opt/storm/apache-storm-0.9.4/data"
|
3) java.library.path: Storm使用的本地库(ZMQ和JZMQ)加载路径,默认为"/usr/local/lib:/opt/local/lib:/usr/lib",一般来说ZMQ和JZMQ默认安装在/usr/local/lib 下,因此不需要配置即可。
4) nimbus.host: Storm集群Nimbus机器地址,各个Supervisor工作节点需要知道哪个机器是Nimbus,以便下载Topologies的jars、confs等文件,如:
nimbus.host:
"192.168.1.225"
|
5) supervisor.slots.ports: 对于每个Supervisor工作节点,需要配置该工作节点可以运行的worker数量。每个worker占用一个单独的端口用于接收消息,该配置选项即 用于定义哪些端口是可被worker使用的。默认情况下,每个节点上可运行4个workers,分别在6700、6701、6702和6703端口,如:
supervisor.slots.ports:
-
6700
-
6701
-
6702
-
6703
|
注意:storm.yaml 每行必须以一个英文空格开始
6)jvm参数
nimbus.childopts:
"-Xmx1024m"
|
nimbus.childopts:配置nimbus启动参数
supervisor.childopts:
"-Xmx256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -verbose:gc -Xloggc:D:\\developers\\tools\\apache-storm-0.9.4\\logs\\gc-storm-supervisor.log"
|
supervisor.childopts:配置supervisor启动参数
logviewer.childopts:
"-Xmx128m"
|
logviewer.childopts:配置logviewer启动参数
worker.childopts:
"-Xmx768m -verbose:gc -Xloggc:D:\\developers\\tools\\apache-storm-0.9.4\\logs\\gc-storm-worker-%ID%.log -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=D:\\developers\\tools\\apache-storm-0.9.4\\logs\\dump"
|
worker.childopts: 配置woker启动参数
worker.gc.childopts:
"-XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75"
|
worker.gc.childopts: 配置woker gc相关参数
最 后一步,启动Storm的所有后台进程。和Zookeeper一样,Storm也是快速失败(fail-fast)的系统,这样Storm才能在任意时刻 被停止,并且当进程重启后被正确地恢复执行。这也是为什么Storm不在进程内保存状态的原因,即使Nimbus或Supervisors被重启,运行中 的Topologies不会受到影响。
以下是启动Storm各个后台进程的方式:
Nimbus: 在Storm主控节点上运行如下命令启动Nimbus后台程序,并放到后台执行:
bin/storm nimbus >/dev/null 2>&1 &
Supervisor: 在Storm各个工作节点上运行如下启动Supervisor后台程序,并放到后台执行:
bin/storm supervisor >/dev/null 2>&1 &
UI: 在Storm主控节点上运行如下命令启动UI后台程序,并放到后台执行:
bin/storm ui >/dev/null 2>&1 &
启动后可以通过http://{nimbus host}:8080观察集群的worker资源使用情况、Topologies的运行状态等信息。
注意事项:
Storm后台进程被启动后,将在Storm安装部署目录下的logs/子目录下生成各个进程的日志文件。
经测试,Storm UI必须和Storm Nimbus部署在同一台机器上,否则UI无法正常工作,因为UI进程会检查本机是否存在Nimbus链接。
为了方便使用,可以将bin/storm加入到系统环境变量中。
建议Nimbus和Supervisor分开部署
注意Storm UI权限控制 默认UI可直接kill Topology
1)启动Storm Topology:
storm jar allmycode.jar org.me.MyTopology arg1 arg2 arg3
|
其中,allmycode.jar是包含Topology实现代码的jar包,org.me.MyTopology的main方法是Topology的入口,arg1、arg2和arg3为org.me.MyTopology执行时需要传入的参数。
注意:1、每次向集群提交jar 同样位置文件名不要重复
2)停止Storm Topology:
storm
kill
{toponame}
|
其中,{toponame}为Topology提交到Storm集群时指定的Topology任务名称。
为 了更新一个正在运行的topology, 唯一的选择是杀掉正在运行的topology然后重新提交一个新的。一个计划中的命令是实现一个storm swap命令来运行时更新topology, 并且保证前后两个topology不会同时在运行,同时保证替换所造成的“停机”时间最少。
监控topology
监控topology的最好的方法是使用Storm UI。Storm UI提供有关task里面发生的错误以及topology里面每个组件的吞吐量和性能方面的统计信息。同时你可以看看集群里面工作机器上面的日志。
Storm支持在不restart topology的情况下, 动态的改变(增减)worker processes的数目和executors的数目, 称为rebalancing.
主要有两种方法可以rebalance一个topology:
使用Storm web UI 来 rebalance topology.
使用CLI 工具 rebalance topology,一个例子如下:
# Reconfigure the topology "mytopology" to use 5 worker processes,
# the spout "blue-spout" to use 3 executors and
# the bolt "yellow-bolt" to use 10 executors.
storm rebalance mytopology -n 5 -e blue-spout=3 -e yellow-bolt=10
|
注意:
“-e yellow-bolt=3”, 可以用于减小yellow-bolt的并发度,但并不能增大其并发度。
也就说如果默认yellow-bolt的并发度为5(在创建topology时设定),那么我们可以用“-e yellow-bolt=4”将其并发度减小为4,但并不能使用“-e yellow-bolt=6”将其并发发度调整为6。
“-e yellow-bolt=6”命令的情况是:如果当前yellow-bolt的并发度为5,则什么也做;如果yellow-bolt当前的并发度小于5,将其调整为5。
关闭nimbus相关进程
kill
`
ps
aux |
egrep
'(daemon\.nimbus)|(storm\.ui\.core)'
|
fgrep
-
v
egrep
|
awk
'{print $2}'
`
|
关闭supervisor相关进程
kill
`
ps
aux |
fgrep
storm |
fgrep
-
v
'fgrep'
|
awk
'{print$2}'
`
|