Zabbix监控zookeeper
近期需求增加zookeeper监控到zabbix , 经过一番搜索, 确定通过 agentd端运行脚本的方式采集数据,传送给 zabbix-server端.
Zabbix_sender方式发送
将这些监控数据一次性使用zabbix_sender全部发送给zabbix.将监控项目汇集成一个字典,然后遍历这个字典,将字典中的key:value对通过zabbix_sender的-k和-o参数指定发送出去.
感谢博文:https://blog.csdn.net/reblue520/article/details/52352689
基本上参考了这篇博文的方法及python 脚本 . 模版文档.
经过修改及仔细查看, 该博文中的xml模版文件,发现 server 通过 调用 zookeeper.status[alive] 这个key值,来获取pid, 同时出发sender 发送给server 数据 . 无需配合计划任务, 可能是我初学python , 没有理解好用途. 总之没成功,但是经过修改后,可以 正确获取数据.
下面为修改后的 python代码
#!/usr/bin/python """{'zk_followers': 0, 'zk_outstanding_requests': 0, 'zk_approximate_data_size': 890971, 'zk_packets_sent': 5818488, 'zk_pending_syncs': 0, 'zk_avg_latency': 0, 'zk_version': '3.4.6-1569965, built on 02/20/2014 09:09 GMT', 'zk_watch_count': 1364, 'zk_packets_received': 5797681, 'zk_open_file_descriptor_count': 46, 'zk_server_ruok': 'imok', 'zk_server_state': 'follower', 'zk_synced_followers': 0, 'zk_max_latency': 400, 'zk_num_alive_connections': 18, 'zk_min_latency': 0, 'zk_ephemerals_count': 1112, 'zk_znode_count': 2207, 'zk_max_file_descriptor_count': 4096} """ import sys import socket import re import subprocess from StringIO import StringIO import os zabbix_sender = '/usr/local/zabbix/bin/zabbix_sender' zabbix_conf = '/usr/local/zabbix/conf/zabbix_agentd.conf' send_to_zabbix = 1 # get zookeeper server status class ZooKeeperServer(object): def __init__(self, host='localhost', port='2181', timeout=1): self._address = (host, int(port)) self._timeout = timeout self._result = {} def _create_socket(self): return socket.socket() def _send_cmd(self, cmd): """ Send a 4letter word command to the server """ s = self._create_socket() s.settimeout(self._timeout) s.connect(self._address) s.send(cmd) data = s.recv(2048) s.close() return data def get_stats(self): """ Get ZooKeeper server stats as a map """ """zk_version 3.4.6-1569965, built on 02/20/2014 09:09 GMT zk_avg_latency 0 zk_max_latency 94 zk_min_latency 0 zk_packets_received 1267904 zk_packets_sent 1317835 zk_num_alive_connections 12 zk_outstanding_requests 0 zk_server_state follower zk_znode_count 1684 zk_watch_count 2757 zk_ephemerals_count 899 zk_approximate_data_size 728074 zk_open_file_descriptor_count 41 zk_max_file_descriptor_count 4096 """ data_mntr = self._send_cmd('mntr') data_ruok = self._send_cmd('ruok') if data_mntr: result_mntr = self._parse(data_mntr) if data_ruok: # {'zk_server_ruok': 'imok'} result_ruok = self._parse_ruok(data_ruok) self._result = dict(result_mntr.items() + result_ruok.items()) if not self._result.has_key('zk_followers') and not self._result.has_key('zk_synced_followers') and not self._result.has_key('zk_pending_syncs'): # #### the tree metrics only exposed on leader role zookeeper server, we just set the followers' to 0 leader_only = {'zk_followers':0,'zk_synced_followers':0,'zk_pending_syncs':0} self._result = dict(result_mntr.items() + result_ruok.items() + leader_only.items()) return self._result def _parse(self, data): """ :param data: zk_outstanding_requests 0 zk_approximate_data_size 653931 :return: {'zk_outstanding_requests': '0', 'zk_approximate_data_size': '653931',} """ """ Parse the output from the 'mntr' 4letter word command """ h = StringIO(data) result = {} for line in h.readlines(): try: key, value = self._parse_line(line) result[key] = value except ValueError: pass # ignore broken lines return result def _parse_ruok(self, data): """ :param data: imok :return: {'zk_server_ruok': 'imok'} """ """ Parse the output from the 'ruok' 4letter word command """ h = StringIO(data) result = {} ruok = h.readline() if ruok: result['zk_server_ruok'] = ruok return result def _parse_line(self, line): # zk_watch_count 1482 try: # zk_max_file_descriptor_count 65535 key, value = map(str.strip, line.split('\t')) except ValueError: raise ValueError('Found invalid line: %s' % line) if not key: raise ValueError('The key is mandatory and should not be empty') try: value = int(value) except (TypeError, ValueError): pass return key, value def get_pid(self): # ps -ef|grep java|grep zookeeper|awk '{print $2}' pidarg = '''ps -ef|grep java|grep zookeeper|grep -v grep|awk '{print $2}' ''' # 31022 pidout = subprocess.Popen(pidarg, shell=True, stdout=subprocess.PIPE) pid = pidout.stdout.readline().strip('\n') return pid def send_to_zabbix(self, metric): # key = zookeeper.status[zk_max_file_descriptor_count] key = "zookeeper.status[" + metric + "]" if send_to_zabbix > 0: # print key + ":" + str(self._result[metric]) try: subprocess.call([zabbix_sender, "-c", zabbix_conf, "-k", key, "-o", str(self._result[metric])], stdout=FNULL, stderr=FNULL, shell=False) #print "send zabbix success" except OSError, detail: print "Something went wrong while exectuting zabbix_sender : ", detail else: print "Simulation: the following command would be execucted :\n", zabbix_sender, "-c", zabbix_conf, "-k", key, "-o", self._result[metric], "\n" def usage(): """Display program usage""" print "\nUsage : ", sys.argv[0], " alive|all" print "Modes : \n\talive : Return pid of running zookeeper\n\tall : Send zookeeper stats as well" sys.exit(1) accepted_modes = ['alive', 'all'] if len(sys.argv) == 2 and sys.argv[1] in accepted_modes: mode = sys.argv[1] else: usage() zk = ZooKeeperServer() # print zk.get_stats() pid = zk.get_pid() if pid != "" and mode == 'alive': zk.get_stats() # print zk._result FNULL = open(os.devnull, 'w') for key in zk._result: zk.send_to_zabbix(key) FNULL.close() print pid
下面为原代码的最后部分,同时贴出,对比下逻辑, 如果有我的方式有问题, 请留言指正, 新手感谢 .
if pid != "" and mode == 'all': zk.get_stats() # print zk._result FNULL = open(os.devnull, 'w') for key in zk._result: zk.send_to_zabbix(key) FNULL.close() print pid elif pid != "" and mode == "alive": print pid else: print 0
Zabbix_agentd获取
参考了上一种方式中的python脚本, 经过修改, 所有数据都通过agentd的方式获取数据.
一下为修改后的python脚本, 个人感觉shell实现很简单,但是 作为一个python初学者, 还是自己修改了下, 功能实现了.
#!/usr/bin/python #monitor zookeeper #by mo import sys import socket import re import subprocess from StringIO import StringIO import os class ZooKeeperServer(object): def __init__(self, host='localhost', port='2181', timeout=1): self._address = (host, int(port)) self._timeout = timeout self._result = {} def _create_socket(self): return socket.socket() def _send_cmd(self, cmd): """ Send a 4letter word command to the server """ s = self._create_socket() s.settimeout(self._timeout) s.connect(self._address) s.send(cmd) data = s.recv(2048) s.close() return data def get_pid(self): # ps -ef|grep java|grep zookeeper|awk '{print $2}' pidarg = '''ps -ef|grep java|grep zookeeper|grep -v grep|awk '{print $2}' ''' # 31022 pidout = subprocess.Popen(pidarg, shell=True, stdout=subprocess.PIPE) pid = pidout.stdout.readline().strip('\n') return pid def _parse(self, data): """ :param data: zk_outstanding_requests 0 zk_approximate_data_size 653931 :return: {'zk_outstanding_requests': '0', 'zk_approximate_data_size': '653931',} """ """ Parse the output from the 'mntr' 4letter word command """ h = StringIO(data) result = {} for line in h.readlines(): try: key, value = self._parse_line(line) result[key] = value except ValueError: pass # ignore broken lines return result def _parse_line(self, line): # zk_watch_count 1482 try: # zk_max_file_descriptor_count 65535 key, value = map(str.strip, line.split('\t')) except ValueError: raise ValueError('Found invalid line: %s' % line) if not key: raise ValueError('The key is mandatory and should not be empty') try: value = int(value) except (TypeError, ValueError): pass return key, value def get_stats(self): """ Get ZooKeeper server stats as a map """ """zk_version 3.4.6-1569965, built on 02/20/2014 09:09 GMT zk_avg_latency 0 zk_max_latency 94 zk_min_latency 0 zk_packets_received 1267904 zk_packets_sent 1317835 zk_num_alive_connections 12 zk_outstanding_requests 0 zk_server_state follower zk_znode_count 1684 zk_watch_count 2757 zk_ephemerals_count 899 zk_approximate_data_size 728074 zk_open_file_descriptor_count 41 zk_max_file_descriptor_count 4096 """ data_mntr = self._send_cmd('mntr') result_mntr = self._parse(data_mntr) self._result = dict(result_mntr.items()) return self._result zk = ZooKeeperServer() pid = zk.get_pid() accepted_modes = ['zk_server_ruok'] if sys.argv[1] in accepted_modes: cmd = sys.argv[1] data = zk._send_cmd(cmd) print(data) elif sys.argv[1] == 'alive': print(pid) elif sys.argv[1] == 'mntr': cmd = sys.argv[1] data = zk._send_cmd(cmd) print(data) else: zk.get_stats() key = sys.argv[1] print(zk._result[key])
模版
参考博文中的模版
3.0 2017-12-11T08:02:58Z Zabbix servers Zookeeper Zookeeper Zabbix servers ZooKeeper Status zookeeper pid 2 0 zookeeper.status[alive] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper approximate data size 2 0 zookeeper.status[zk_approximate_data_size] 0 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper average latency 2 0 zookeeper.status[zk_avg_latency] 0 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper ephemerals count 2 0 zookeeper.status[zk_ephemerals_count] 0 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper leader's followers 2 0 zookeeper.status[zk_followers] 0 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper max file descriptor count 2 0 zookeeper.status[zk_max_file_descriptor_count] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper max latency 2 0 zookeeper.status[zk_max_latency] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper min latency 2 0 zookeeper.status[zk_min_latency] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper alive connections 2 0 zookeeper.status[zk_num_alive_connections] 0 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper opened file descriptor count 2 0 zookeeper.status[zk_open_file_descriptor_count] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper outstanding requests 2 0 zookeeper.status[zk_outstanding_requests] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper packages received 2 0 zookeeper.status[zk_packets_received] 10 90 365 0 3 0 0 0 0 1 0 0 收包数量 0 ZooKeeper Status zookeeper packages sent 2 0 zookeeper.status[zk_packets_sent] 10 90 365 0 3 0 0 0 0 1 0 0 发包数据量 0 ZooKeeper Status zookeeper leader's pending syncs 2 0 zookeeper.status[zk_pending_syncs] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper response checking 2 0 zookeeper.status[zk_server_ruok] 10 90 0 0 1 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper state role 2 0 zookeeper.status[zk_server_state] 10 90 0 0 1 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper leader's synced followers 2 0 zookeeper.status[zk_synced_followers] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper version 2 0 zookeeper.status[zk_version] 10 90 0 0 1 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper watches count 2 0 zookeeper.status[zk_watch_count] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status zookeeper znodes count 2 0 zookeeper.status[zk_znode_count] 10 90 365 0 3 0 0 0 0 1 0 0 0 ZooKeeper Status {Zookeeper:zookeeper.status[zk_outstanding_requests].last()}>10 big outstanding requests number 0 0 0 {Zookeeper:zookeeper.status[zk_pending_syncs].last()}>10 big pending syncs 0 0 0 {Zookeeper:zookeeper.status[zk_avg_latency].last()}>10 large average latency 0 0 0 {Zookeeper:zookeeper.status[zk_open_file_descriptor_count].last()} > {Zookeeper:zookeeper.status[zk_max_file_descriptor_count].last()}*0.85 large file descriptor used 0 0 0 {Zookeeper:zookeeper.status[zk_server_ruok].str(imok)}<>1 zookeeper is abnormal 0 4 0 {Zookeeper:zookeeper.status[alive].last()}=0 zookeeper is not running 0 4 0 {Zookeeper:zookeeper.status[zk_server_state].abschange()}>0 zookeeper state role has been changed 0 1 0 ZooKeeper Alive Connections 900 200 0.0000 100.0000 1 1 0 1 0 0.0000 0.0000 0 0 0 0 0 0 1A7C11 0 2 0 Zookeeper zookeeper.status[zk_num_alive_connections] ZooKeeper Latency 900 200 0.0000 100.0000 1 1 0 1 0 0.0000 0.0000 0 0 0 0 0 0 1A7C11 0 2 0 Zookeeper zookeeper.status[zk_avg_latency]
Zabbix_agentd配置
UserParameter=zookeeper.status[*],/usr/bin/python /usr/local/zabbix/scripts/zookeeper_monitor.py $1
###
注意: 给脚本执行权限