Zabbix监控zookeeper

近期需求增加zookeeper监控到zabbix , 经过一番搜索确定通过 agentd端运行脚本的方式采集数据,传送给 zabbix-server.


Zabbix_sender方式发送

将这些监控数据一次性使用zabbix_sender全部发送给zabbix.将监控项目汇集成一个字典,然后遍历这个字典,将字典中的key:value对通过zabbix_sender-k-o参数指定发送出去.


感谢博文:https://blog.csdn.net/reblue520/article/details/52352689


基本上参考了这篇博文的方法及python 脚本 模版文档.



经过修改及仔细查看, 该博文中的xml模版文件,发现 server 通过 调用  zookeeper.status[alive]  这个key值,来获取pid, 同时出发sender 发送给server 数据 . 无需配合计划任务, 可能是我初学python , 没有理解好用途. 总之没成功,但是经过修改后,可以 正确获取数据.

下面为修改后的 python代码

#!/usr/bin/python
"""{'zk_followers': 0, 
'zk_outstanding_requests': 0, 
'zk_approximate_data_size': 890971, 
'zk_packets_sent': 5818488, 
'zk_pending_syncs': 0, 
'zk_avg_latency': 0, 
'zk_version': '3.4.6-1569965, built on 02/20/2014 09:09 GMT', 
'zk_watch_count': 1364, 
'zk_packets_received': 5797681, 
'zk_open_file_descriptor_count': 46, 
'zk_server_ruok': 'imok', 
'zk_server_state': 'follower', 
'zk_synced_followers': 0, 
'zk_max_latency': 400, 
'zk_num_alive_connections': 18, 
'zk_min_latency': 0, 
'zk_ephemerals_count': 1112, 
'zk_znode_count': 2207, 
'zk_max_file_descriptor_count': 4096} 
"""

import sys
import socket
import re
import subprocess
from StringIO import StringIO
import os
 
 
zabbix_sender = '/usr/local/zabbix/bin/zabbix_sender'
zabbix_conf = '/usr/local/zabbix/conf/zabbix_agentd.conf'
send_to_zabbix = 1


# get zookeeper server status
class ZooKeeperServer(object):
 
    def __init__(self, host='localhost', port='2181', timeout=1):
        self._address = (host, int(port))
        self._timeout = timeout
        self._result = {}

    def _create_socket(self):
        return socket.socket()

    def _send_cmd(self, cmd):
        """ Send a 4letter word command to the server """
        s = self._create_socket()
        s.settimeout(self._timeout)

        s.connect(self._address)
        s.send(cmd)

        data = s.recv(2048)
        s.close()

        return data
 
    def get_stats(self):
        """ Get ZooKeeper server stats as a map """
        """zk_version      3.4.6-1569965, built on 02/20/2014 09:09 GMT
            zk_avg_latency  0
            zk_max_latency  94
            zk_min_latency  0
            zk_packets_received     1267904
            zk_packets_sent 1317835
            zk_num_alive_connections        12
            zk_outstanding_requests 0
            zk_server_state follower
            zk_znode_count  1684
            zk_watch_count  2757
            zk_ephemerals_count     899
            zk_approximate_data_size        728074
            zk_open_file_descriptor_count   41
            zk_max_file_descriptor_count    4096
        """
        data_mntr = self._send_cmd('mntr')
        data_ruok = self._send_cmd('ruok')
        if data_mntr:
            result_mntr = self._parse(data_mntr)
        if data_ruok:
            # {'zk_server_ruok': 'imok'}
            result_ruok = self._parse_ruok(data_ruok)
 
        self._result = dict(result_mntr.items() + result_ruok.items())
         
        if not self._result.has_key('zk_followers') and not self._result.has_key('zk_synced_followers') and not self._result.has_key('zk_pending_syncs'):
           # #### the tree metrics only exposed on leader role zookeeper server, we just set the followers' to 0
           leader_only = {'zk_followers':0,'zk_synced_followers':0,'zk_pending_syncs':0}    
           self._result = dict(result_mntr.items() + result_ruok.items() + leader_only.items())
 
        return self._result  

    def _parse(self, data):
        """
        :param data: zk_outstanding_requests 0 zk_approximate_data_size        653931
        :return: {'zk_outstanding_requests': '0', 'zk_approximate_data_size': '653931',}
        """
        """ Parse the output from the 'mntr' 4letter word command """
        h = StringIO(data)
        result = {}
        for line in h.readlines():
            try:
                key, value = self._parse_line(line)
                result[key] = value
            except ValueError:
                pass # ignore broken lines
 
        return result
 
    def _parse_ruok(self, data):
        """
        :param data: imok
        :return: {'zk_server_ruok': 'imok'}
        """
        """ Parse the output from the 'ruok' 4letter word command """
        
        h = StringIO(data)
        result = {}
        
        ruok = h.readline()
        if ruok:
           result['zk_server_ruok'] = ruok
  
        return result
 
    def _parse_line(self, line):
        # zk_watch_count  1482
        try:
            # zk_max_file_descriptor_count 65535
            key, value = map(str.strip, line.split('\t'))
        except ValueError:
            raise ValueError('Found invalid line: %s' % line)
 
        if not key:
            raise ValueError('The key is mandatory and should not be empty')
 
        try:
            value = int(value)
        except (TypeError, ValueError):
            pass
 
        return key, value

    def get_pid(self):
        # ps -ef|grep java|grep zookeeper|awk '{print $2}'
        pidarg = '''ps -ef|grep java|grep zookeeper|grep -v grep|awk '{print $2}' '''   # 31022
        pidout = subprocess.Popen(pidarg, shell=True, stdout=subprocess.PIPE)
        pid = pidout.stdout.readline().strip('\n')
        return pid

    def send_to_zabbix(self, metric):
        # key = zookeeper.status[zk_max_file_descriptor_count]
        key = "zookeeper.status[" + metric + "]"
        if send_to_zabbix > 0:
            # print key + ":" + str(self._result[metric])
            try:
                subprocess.call([zabbix_sender, "-c", zabbix_conf, "-k", key, "-o", str(self._result[metric])], stdout=FNULL, stderr=FNULL, shell=False)
                #print "send zabbix success"
            except OSError, detail:
                print "Something went wrong while exectuting zabbix_sender : ", detail
        else:
            print "Simulation: the following command would be execucted :\n", zabbix_sender, "-c", zabbix_conf, "-k", key, "-o", self._result[metric], "\n"


def usage():
        """Display program usage"""

        print "\nUsage : ", sys.argv[0], " alive|all"
        print "Modes : \n\talive : Return pid of running zookeeper\n\tall : Send zookeeper stats as well"
        sys.exit(1)

        
accepted_modes = ['alive', 'all']
if len(sys.argv) == 2 and sys.argv[1] in accepted_modes:
        mode = sys.argv[1]
else:
        usage()

zk = ZooKeeperServer()
#  print zk.get_stats()
pid = zk.get_pid()
 
if pid != "" and mode == 'alive':
    zk.get_stats()
    # print zk._result
    FNULL = open(os.devnull, 'w')
    for key in zk._result:
       zk.send_to_zabbix(key)
    FNULL.close()
    print pid

下面为原代码的最后部分,同时贴出,对比下逻辑, 如果有我的方式有问题, 请留言指正, 新手感谢 .

if pid != "" and  mode == 'all':  
   zk.get_stats()  
   # print zk._result  
   FNULL = open(os.devnull, 'w')  
   for key in zk._result:  
       zk.send_to_zabbix(key)  
   FNULL.close()  
   print pid  
   
elif pid != "" and mode == "alive":  
    print pid  
else:  
    print 0



Zabbix_agentd获取

参考了上一种方式中的python脚本经过修改, 所有数据都通过agentd的方式获取数据.

一下为修改后的python脚本个人感觉shell实现很简单,但是 作为一个python初学者还是自己修改了下功能实现了.


#!/usr/bin/python
#monitor zookeeper
#by mo

import sys
import socket
import re
import subprocess
from StringIO import StringIO
import os

class ZooKeeperServer(object):
    def __init__(self, host='localhost', port='2181', timeout=1):
        self._address = (host, int(port))
        self._timeout = timeout
        self._result = {}

    def _create_socket(self):
        return socket.socket()

    def _send_cmd(self, cmd):
        """ Send a 4letter word command to the server """
        s = self._create_socket()
        s.settimeout(self._timeout)

        s.connect(self._address)
        s.send(cmd)

        data = s.recv(2048)
        s.close()

        return data

    def get_pid(self):
        # ps -ef|grep java|grep zookeeper|awk '{print $2}'
        pidarg = '''ps -ef|grep java|grep zookeeper|grep -v grep|awk '{print $2}' '''   # 31022
        pidout = subprocess.Popen(pidarg, shell=True, stdout=subprocess.PIPE)
        pid = pidout.stdout.readline().strip('\n')
        return pid

    def _parse(self, data):
        """
        :param data: zk_outstanding_requests 0 zk_approximate_data_size        653931
        :return: {'zk_outstanding_requests': '0', 'zk_approximate_data_size': '653931',}
        """
        """ Parse the output from the 'mntr' 4letter word command """
        h = StringIO(data)
        result = {}
        for line in h.readlines():
            try:
                key, value = self._parse_line(line)
                result[key] = value
            except ValueError:
                pass  # ignore broken lines

        return result

    def _parse_line(self, line):
        # zk_watch_count  1482
        try:
            # zk_max_file_descriptor_count 65535
            key, value = map(str.strip, line.split('\t'))
        except ValueError:
            raise ValueError('Found invalid line: %s' % line)

        if not key:
            raise ValueError('The key is mandatory and should not be empty')

        try:
            value = int(value)
        except (TypeError, ValueError):
            pass

        return key, value

    def get_stats(self):
        """ Get ZooKeeper server stats as a map """
        """zk_version      3.4.6-1569965, built on 02/20/2014 09:09 GMT
            zk_avg_latency  0
            zk_max_latency  94
            zk_min_latency  0
            zk_packets_received     1267904
            zk_packets_sent 1317835
            zk_num_alive_connections        12
            zk_outstanding_requests 0
            zk_server_state follower
            zk_znode_count  1684
            zk_watch_count  2757
            zk_ephemerals_count     899
            zk_approximate_data_size        728074
            zk_open_file_descriptor_count   41
            zk_max_file_descriptor_count    4096
        """
        data_mntr = self._send_cmd('mntr')
        result_mntr = self._parse(data_mntr)
   self._result = dict(result_mntr.items())

        return self._result


zk = ZooKeeperServer()
pid = zk.get_pid()

accepted_modes = ['zk_server_ruok']
if sys.argv[1] in accepted_modes:
    cmd = sys.argv[1]
    data = zk._send_cmd(cmd)
    print(data)
elif sys.argv[1] == 'alive':
    print(pid)
elif sys.argv[1] == 'mntr':
    cmd = sys.argv[1]
    data = zk._send_cmd(cmd)
    print(data)
else:
    zk.get_stats()
    key = sys.argv[1]
    print(zk._result[key])



模版

参考博文中的模版



    3.0
    2017-12-11T08:02:58Z
    
        
            Zabbix servers
        
    
    
        
    
    
        
            {Zookeeper:zookeeper.status[zk_outstanding_requests].last()}>10
            big outstanding requests number
            
            0
            0
            
            0
            
        
        
            {Zookeeper:zookeeper.status[zk_pending_syncs].last()}>10
            big pending syncs
            
            0
            0
            
            0
            
        
        
            {Zookeeper:zookeeper.status[zk_avg_latency].last()}>10
            large average latency
            
            0
            0
            
            0
            
        
        
            {Zookeeper:zookeeper.status[zk_open_file_descriptor_count].last()} > {Zookeeper:zookeeper.status[zk_max_file_descriptor_count].last()}*0.85
            large file descriptor used
            
            0
            0
            
            0
            
        
        
            {Zookeeper:zookeeper.status[zk_server_ruok].str(imok)}<>1
            zookeeper is abnormal
            
            0
            4
            
            0
            
        
        
            {Zookeeper:zookeeper.status[alive].last()}=0
            zookeeper is not running
            
            0
            4
            
            0
            
        
        
            {Zookeeper:zookeeper.status[zk_server_state].abschange()}>0
            zookeeper state role has been changed
            
            0
            1
            
            0
            
        
    
    
        
            ZooKeeper Alive Connections
            900
            200
            0.0000
            100.0000
            1
            1
            0
            1
            0
            0.0000
            0.0000
            0
            0
            0
            0
            
                
                    0
                    0
                    1A7C11
                    0
                    2
                    0
                    
                        Zookeeper
                        zookeeper.status[zk_num_alive_connections]
                    
                
            
        
        
            ZooKeeper Latency
            900
            200
            0.0000
            100.0000
            1
            1
            0
            1
            0
            0.0000
            0.0000
            0
            0
            0
            0
            
                
                    0
                    0
                    1A7C11
                    0
                    2
                    0
                    
                        Zookeeper
                        zookeeper.status[zk_avg_latency]
                    
                
            
        
    

Zabbix_agentd配置

UserParameter=zookeeper.status[*],/usr/bin/python /usr/local/zabbix/scripts/zookeeper_monitor.py $1


###

注意给脚本执行权限