Shell脚本监控、拉起Nimbus和Supervisor进程

Nimbus和Supervisor都是快速失败,无状态的进程,Nimbus的单点问题一直没有很好的解决办法,所以我们可以对相关进程进行监控,在其挂掉时尝试重启。

在之前的项目里,比较常用的方式是通过monit对相关进程进行监控,通过monit监控需要对每台机器进行配置,可以通过Fabric(http://www.fabfile.org)进行统一配置,这里不详细介绍monit监控的方式。

通过Shell脚本可以实现在Nimbus节点上,监控整个集群,前提是需要Nimbus节点与Supervisor建立SSH无密码访问。具体内容如下 

main.sh

#!bin/bash
dir=`dirname $0`

while [ 1 ]
do
        echo "==========  "`date`"    ==============="
        nid=`jps -l|grep 'nimbus'|awk '{print $1}'`
        if [ "$nid" = "" ]; then
                echo  'storm nimbus is dead!'
                echo  'trying to start nimbus...'
                nohup storm nimbus >nimbus.log &
                echo 'finish starting!'
        else
                echo "storm nimbus id: $nid"
        fi

        uid=`jps -l|grep 'backtype.storm.ui.core'|awk '{print $1}'`
    if [ "$nid" = "" ]; then
        echo  'storm ui process is dead!'
                echo  'trying to start storm ui'
                nohup storm ui >ui.log &
                echo 'finish starting storm ui!'
    else
        echo "storm ui id: $uid"
    fi 

        sh $dir/storm_manager.sh start

        echo "sleeping 20s..."
        sleep 20
done
storm_manager.sh
 
  
#!bin/bash

    slaves="cdn36 cdn37 cdn39 cdn21 cdn22 cdn23"
storm_dir='/data/tmp/storm'

check_supervisors(){
    for node in $slaves
    do 
        ssh  $node <supervisor.log &
                echo "finishing starting $node's supervisor"
            else
                echo "supervisor process id: \$sid"
            fi

END
                echo

    done

}

#同步配置文件
sync_config(){
    for node in $slaves
    do 
                scp /opt/package/apache-storm-0.9.2-incubating/conf/storm.yaml root@$node:/opt/package/apache-storm-0.9.2-incubating/conf/
        echo "finishing sync $node config!"
    done
}


mytest(){
        for node in $slaves
        do
                ssh $node <


你可能感兴趣的:(storm,storm,linux,shell)