==========================================================================================
一、服务器分布
==========================================================================================
1、相关说明
IP地址 |
主机名 |
角色 |
192.168.1.100 |
storm_zk1 |
leader |
192.168.1.101 |
storm_zk2 |
follower |
192.168.1.102 |
storm_zk3 |
follower |
192.168.1.103 |
storm_nimbus |
Nimbus |
192.168.1.104 |
storm_supervisor1 |
Supervisor |
192.168.1.105 |
storm_supervisor2 |
Supervisor |
2、hosts信息添加
# vim /etc/hosts
192.168.1.100 storm_zk1 storm_zk1
192.168.1.101 storm_zk2 storm_zk2
192.168.1.102 storm_zk3 storm_zk3
192.168.1.103 storm_nimbus storm_nimbus
192.168.1.104 storm_supervisor1 storm_supervisor1
192.168.1.105 storm_supervisor2 storm_supervisor2
3、总体架构
==========================================================================================
二、ZooKeeper集群部署
==========================================================================================
1、JDK安装
http://download.oracle.com/otn-pub/java/jdk/7u65-b17/jdk-7u65-linux-x64.tar.gz
# tar xvzf jdk-7u65-linux-x64.gz -C /usr/local
# cd /usr/local && ln -s jdk1.7.0_65 jdk
# vim /etc/profile
export JAVA_HOME=/usr/local/jdk
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin
# source /etc/profile
# cd /usr/local/bin && ln -s /usr/local/jdk/bin/java
2、ZooKeeper安装
http://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz
# tar xvzf zookeeper-3.4.5.tar.gz -C /usr/local
# cd /usr/local && ln -s zookeeper-3.4.5 zookeeper
3、ZooKeeper配置
# mkdir -p /data/zookeeper/{data,logs}
# vim /usr/local/zookeeper/conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/zookeeper/data
dataLogDir=/data/zookeeper/logs
clientPort=2181
maxClientCnxns=3000
server.1=192.168.1.100:2888:3888
server.2=192.168.1.101:2888:3888
server.3=192.168.1.102:2888:3888
autopurge.snapRetainCount=6
autopurge.purgeInterval=2
4、日志级别调整
# vim /usr/local/zookeeper/conf/log4j.properties
zookeeper.root.logger=ERROR,CONSOLE
zookeeper.console.threshold=ERROR
zookeeper.log.threshold=ERROR
5、新增myid文件
在 192.168.1.100 上执行:
# echo 1 > /data/zookeeper/data/myid
在 192.168.1.101 上执行:
# echo 2 > /data/zookeeper/data/myid
在 192.168.1.102 上执行:
# echo 3 > /data/zookeeper/data/myid
6、ZooKeeper集群启动
启动zookeeper集群
# /usr/local/zookeeper/bin/zkServer.sh start
测试zookeeper集群是否建立成功,如无报错表示集群创建成功
# /usr/local/zookeeper/bin/zkCli.sh -server localhost:2181
查看运行状态
# /usr/local/zookeeper/bin/zkServer.sh status
7、服务健康监控
# touch /var/run/check_zookeeper.lock
# echo 'touch /var/run/check_zookeeper.lock' >> /etc/rc.d/rc.local
# vim /usr/local/zookeeper/bin/check_service.sh
#!/bin/sh PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin SRV_PORT="2181" ## 端口号 SRV_PROT="TCP" ## 协议类型 ## 是否已正确扫描 SCAN_FLAG=0 ETH1_ADDR=`/sbin/ifconfig eth1 | awk -F ':' '/inet addr/{print $2}' | sed 's/[a-zA-Z ]//g'` TMP_SRV_PROT=`echo ${SRV_PROT} | tr '[A-Z]' '[a-z]'` if [[ "${TMP_SRV_PROT}" == "tcp" ]]; then PROT_OPT="S" elif [[ "${TMP_SRV_PROT}" == "udp" ]]; then PROT_OPT="U" else echo "未知的协议类型!" && exit 1 fi for ((i=0; i<3; i++)); do RETVAL=`/usr/bin/nmap -n -s${PROT_OPT} -p ${SRV_PORT} ${ETH1_ADDR} | grep open` [[ -n "${RETVAL}" ]] && SCAN_FLAG=1;break || sleep 10 done if [[ ${SCAN_FLAG} -ne 1 ]]; then cd /usr/local/zookeeper/bin; ./zkServer.sh restart >/dev/null 2>&1 fi
需要添加的crontab项:
*/2 * * * * (flock --timeout=0 /var/run/check_zookeeper.lock /usr/local/zookeeper/bin/check_service.sh >/dev/null 2>&1)
==========================================================================================
三、Storm集群部署
==========================================================================================
1、Storm安装
http://mirrors.cnnic.cn/apache/incubator/storm/apache-storm-0.9.1-incubating/apache-storm-0.9.1-incubating.tar.gz
# tar xvzf apache-storm-0.9.1-incubating.tar.gz -C /usr/local
# rm -f apache-storm-0.9.1-incubating.tar.gz
# cd /usr/local
# mv apache-storm-0.9.1-incubating storm-0.9.1
# rm -f storm && ln -s storm-0.9.1 storm
# vim /etc/profile
export STORM_HOME=/usr/local/storm
export PATH=$PATH:$STORM_HOME/bin
# source /etc/profile
2、Storm配置
# mkdir -p /data/storm/{data,logs}
(1)、日志路径修改
# sed -i 's#${storm.home}#/data/storm#g' /usr/local/storm/logback/cluster.xml
(2)、主配置
# vim /usr/local/storm/conf/storm.yaml
drpc.servers:
- "storm_nimbus"
- "storm_supervisor1"
- "storm_supervisor2"
storm.zookeeper.servers:
- "storm_zk1"
- "storm_zk2"
- "storm_zk3"
storm.local.dir:"/data/storm/data"
nimbus.host:"storm_nimbus"
nimbus.thrift.port: 6627
nimbus.thrift.max_buffer_size:1048576
nimbus.childopts:"-Xmx1024m"
nimbus.task.timeout.secs:30
nimbus.supervisor.timeout.secs:60
nimbus.monitor.freq.secs:10
nimbus.cleanup.inbox.freq.secs:600
nimbus.inbox.jar.expiration.secs:3600
nimbus.task.launch.secs:240
nimbus.reassign: true
nimbus.file.copy.expiration.secs:600
nimbus.topology.validator:"backtype.storm.nimbus.DefaultTopologyValidator"
storm.zookeeper.port: 2181
storm.zookeeper.root:"/data/storm/zkinfo"
storm.cluster.mode:"distributed"
storm.local.mode.zmq:false
ui.port: 8080
ui.childopts:"-Xmx768m"
logviewer.port: 8000
logviewer.childopts:"-Xmx128m"
logviewer.appender.name:"A1"
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
supervisor.childopts:"-Xmx1024m"
supervisor.worker.start.timeout.secs:240
supervisor.worker.timeout.secs:30
supervisor.monitor.frequency.secs:3
supervisor.heartbeat.frequency.secs:5
supervisor.enable: true
worker.childopts:"-Xmx2048m"
topology.max.spout.pending:5000
storm.zookeeper.session.timeout:20
storm.zookeeper.connection.timeout:10
storm.zookeeper.retry.times:10
storm.zookeeper.retry.interval:30
storm.zookeeper.retry.intervalceiling.millis:30000
storm.thrift.transport:"backtype.storm.security.auth.SimpleTransportPlugin"
storm.messaging.transport:"backtype.storm.messaging.netty.Context"
storm.messaging.netty.server_worker_threads:1
storm.messaging.netty.client_worker_threads:1
storm.messaging.netty.buffer_size:5242880
storm.messaging.netty.max_retries:100
storm.messaging.netty.max_wait_ms:1000
storm.messaging.netty.min_wait_ms:100
storm.messaging.netty.transfer.batch.size:262144
storm.messaging.netty.flush.check.interval.ms:10
【注意】
(1)、红色1,表示可以运行的worker数量,需要根据实际的内存容量进行设置;
(2)、红色2,表示将storm的消息传送机制更改为netty。
3、服务启动
(1)、Nimbus节点
# nohup /usr/local/storm/bin/storm nimbus >/dev/null 2>&1 &
# nohup /usr/local/storm/bin/storm ui >/dev/null 2>&1 &
(2)、Supervisor节点
# nohup /usr/local/storm/bin/storm supervisor >/dev/null 2>&1 &
# nohup /usr/local/storm/bin/storm logviewer >/dev/null 2>&1 &
【注意】
如果启动不了,需要查看一下在“/etc/hosts”里,是否设置了主机名
4、健康监控
(1)、Nimbus节点设置
# touch /var/run/check_storm.lock
# echo 'touch /var/run/check_storm.lock' >> /etc/rc.d/rc.local
# vim /data/scripts/monitor_nimbus_for_storm.sh
#!/bin/sh PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin ## 监控页面地址参数 MON_SRV_IPADDR=`/sbin/ifconfig eth1 | awk -F ':' '/inet addr/{print $2}' | sed 's/[a-zA-Z ]//g'` MON_SRV_PORT="8080" for ((i=0; i<3; i++)); do RETVAL=`/usr/bin/nmap -n -sS -p ${MON_SRV_PORT} ${MON_SRV_IPADDR} | grep open` [[ -n "${RETVAL}" ]] && SCAN_FLAG=1;break || sleep 10 done if [[ ${SCAN_FLAG} -ne 1 ]]; then nohup /usr/local/storm/bin/storm nimbus >/dev/null 2>&1 & nohup /usr/local/storm/bin/storm ui >/dev/null 2>&1 & fi
需要添加的crontab项:
*/2 * * * * (flock --timeout=0 /var/run/check_storm.lock /data/scripts/monitor_nimbus_for_storm.sh>/dev/null 2>&1)
(2)、Supervisor节点设置
# touch /var/run/check_storm.lock
# echo 'touch /var/run/check_storm.lock' >> /etc/rc.d/rc.local
# vim /data/scripts/monitor_supervisor_for_storm.sh
#!/bin/sh PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin ## 监控页面地址参数 MON_SRV_IPADDR="192.168.1.103" MON_SRV_PORT="8080" RETVAL=$(curl -s http://${MON_SRV_IPADDR}:${MON_SRV_PORT}/ | sed 's/<td>/<td>\n/g'| awk -F '<' '/storm_supervisor/{print $1}' | grep `hostname`) [[-n "${RETVAL}" ]] && exit 0 [[ -n `ps aux | grep "storm" | grep -v grep` ]] && kill -9 `ps aux | grep "storm" | grep -v grep | awk '{print $2}'` [[ -d /data/storm/data ]] && rm -rf /data/storm/data/* nohup /usr/local/storm/bin/storm supervisor >/dev/null 2>&1 & nohup /usr/local/storm/bin/storm logviewer >/dev/null 2>&1 &
需要添加的crontab项:
*/2 * * * * (flock --timeout=0 /var/run/check_storm.lock /data/scripts/monitor_supervisor_for_storm.sh >/dev/null 2>&1)