服务器监控软件monit部署实践

monit是一款功能非常强大的服务器监控软件,如下是我实践部署的经验。
1, 下载软件,并安装
wget http://mmonit.com/monit/dist/monit-5.2.5.tar.gz
tar zxvf monit-5.2.5.tar.gz
./configure
make
make install

2, 查看Monit的程序路径
which  monit
monit

3, 写配置文件
vim /etc/monitrc
参考模板如下,监控了nginx , juggernaut, redis, delayed_job(有些问题)

set daemon  20
set logfile /var/log/monit.log
set mail-format { from: [email protected] }
set mailserver smtp.gmail.com port 587
    username "[email protected]" password "xxxxxx"
    using tlsv1
    with timeout 30 seconds
set alert [email protected]
set httpd port 2812 and
    allow username:password
check process nginx with pidfile /opt/nginx/logs/nginx.pid
    start program = "/opt/nginx/sbin/nginx"
    stop program  = "/usr/bin/pkill -f '/opt/nginx/sbin/nginx'"
    if failed host paii.cn port 80 protocol http then restart
    if 3 restarts within 5 cycles then timeout

check host juggernaut with address 127.0.0.1
    start program = "/usr/bin/nohup /usr/local/bin/node /root/node_modules/.bin/juggernaut"
    stop program  = "/usr/bin/pkill -f 'node /root/node_modules/.bin/juggernaut'"
    if failed port 8080 protocol HTTP
        request /application.js
        with timeout 10 seconds
 then restart

check process redis with pidfile /var/run/redis.pid
   start program = "/root/redis-2.2.5/src/redis-server /etc/redis.conf"
   stop program = "/usr/bin/pkill -f '/root/redis-2.2.5/src/redis-server /etc/redis.conf'"
   if failed host 127.0.0.1 port 6379  then restart

check process leshi-daemon with pidfile /var/run/leshi.pid
   start program = "cmd...."
   stop program = "/usr/bin/pkill -f 'http://xxxxx:80/api'"
   if failed host 127.0.0.1 port 30002 protocol HTTP
        request /live.xml
        with timeout 10 seconds
  then restart

#check process delayed_job with pidfile /var/www/web/tmp/pids/delayed_job.pid
#   start program = "/usr/bin/env  RAILS_ENV=production /usr/local/bin/ruby  /var/www/web/script/delayed_job start"
#   stop program = "/usr/bin/env  RAILS_ENV=production /usr/local/bin/ruby /var/www/web/script/delayed_job  stop"



################################################################################################

faye和 resque的监控补充

参考文件:

https://gist.github.com/3140446
https://github.com/defunkt/resque/blob/master/examples/monit/resque.monit

##################wowza监控 
run.sh

#!/bin/bash

# put in /usr/local/WowzaMediaServer/bin

if [ -z "$1" ]; then
echo -e "Usage: run.sh [startup|shutdown]"
  exit 1
fi

mkdir -p /usr/local/WowzaMediaServer/tmp
cd /usr/local/WowzaMediaServer/bin
./$1.sh &
sleep 5
if [ "startup" == $1 ]; then
PID=`ps auwx | grep java | grep -v grep | awk -F " " ' { print $2 }'`
  echo $PID > /usr/local/WowzaMediaServer/tmp/wowza.pid
fi

# /etc/monit/conf.d/wowza.conf
check process wowza with pidfile /usr/local/WowzaMediaServer/tmp/wowza.pid
    start program = "/usr/local/WowzaMediaServer/bin/run.sh startup" with timeout 60 seconds
    stop program = "/usr/local/WowzaMediaServer/bin/run.sh shutdown"
    if 2 restarts within 3 cycles then timeout
    # if totalmem > 100 Mb then alert # don't do this one, wowza needs lots of memory (?)
    if children > 255 for 5 cycles then stop
    if cpu usage > 95% for 3 cycles then restart
    if failed port 1935 protocol http then restart


你可能感兴趣的:(服务器监控软件monit部署实践)