一 监控原理描述
twemproxy默认监听端口22222,可以使用nc等工具直接访问,以JSON格式输出
$ nc 127.0.0.1 22222 {"service":"nutcracker", "source":"gintama-taiwan-redis2", "version":"0.4.0", "uptime":67135, "timestamp":1427100529, "total_connections":376, "curr_connections":7, "gintama": {"client_eof":0, "client_err":311, "client_connections":0, "server_ejects":58, "forward_error":53, "fragments":0, "192.168.100.68:6379": {"server_eof":1, "server_err":12, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216551863, "requests":553, "request_bytes":333854, "responses":541, "response_bytes":265633, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.68:6380": {"server_eof":1, "server_err":9, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216552893, "requests":20, "request_bytes":1551, "responses":11, "response_bytes":207, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.68:6381": {"server_eof":1, "server_err":1, "server_timedout":0, "server_connections":1, "server_ejected_at":1427037406770236, "requests":287, "request_bytes":10718, "responses":286, "response_bytes":23430, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6379": {"server_eof":1, "server_err":11, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216550647, "requests":55, "request_bytes":2633, "responses":44, "response_bytes":220, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6380": {"server_eof":1, "server_err":9, "server_timedout":0, "server_connections":1, "server_ejected_at":1427040015877803, "requests":291, "request_bytes":15563, "responses":282, "response_bytes":1402, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6381": {"server_eof":0, "server_err":11, "server_timedout":0, "server_connections":1, "server_ejected_at":1427059816411041, "requests":193, "request_bytes":17292, "responses":182, "response_bytes":1767, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0}}}
可以使用工具整理一下输出
{ "service": "nutcracker", "source": "gintama-taiwan-redis2", "version": "0.4.0", "uptime": 64944, "timestamp": 1427098338, "total_connections": 374, "curr_connections": 7, "gintama": { "client_eof": 0, "client_err": 309, "client_connections": 0, "server_ejects": 58, "forward_error": 53, "fragments": 0, "192.168.100.68:6379": { "server_eof": 1, "server_err": 12, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427074216551863, "requests": 552, "request_bytes": 333808, "responses": 540, "response_bytes": 265628, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 }, "192.168.100.68:6380": { "server_eof": 1, "server_err": 9, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427074216552893, "requests": 19, "request_bytes": 1498, "responses": 10, "response_bytes": 202, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 }, "192.168.100.68:6381": { "server_eof": 1, "server_err": 1, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427037406770236, "requests": 286, "request_bytes": 10672, "responses": 285, "response_bytes": 23425, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 }, "192.168.100.69:6379": { "server_eof": 1, "server_err": 11, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427074216550647, "requests": 54, "request_bytes": 2580, "responses": 43, "response_bytes": 215, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 }, "192.168.100.69:6380": { "server_eof": 1, "server_err": 9, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427040015877803, "requests": 291, "request_bytes": 15563, "responses": 282, "response_bytes": 1402, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 }, "192.168.100.69:6381": { "server_eof": 0, "server_err": 11, "server_timedout": 0, "server_connections": 1, "server_ejected_at": 1427059816411041, "requests": 193, "request_bytes": 17292, "responses": 182, "response_bytes": 1767, "in_queue": 0, "in_queue_bytes": 0, "out_queue": 0, "out_queue_bytes": 0 } } }
由此可以知道zabbix需要监控service,total_connections几个规定字段。同时由于配置文件中各个pool是变更的,不固定的,各个pool下面的redis主机也是变更的,不固定的。所以,需要使用zabbix低级发现功能来发现由pool和redis主机构成的键值对。例如通过zabbix来发现
"{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6381"
这样的键值对。然后再根据键值对获取各个字段的值。
各个字段的含义可以使用nutcracker -D获取
pool stats: client_eof "# eof on client connections" client_err "# errors on client connections" client_connections "# active client connections" server_ejects "# times backend server was ejected" forward_error "# times we encountered a forwarding error" fragments "# fragments created from a multi-vector request" server stats: server_eof "# eof on server connections" server_err "# errors on server connections" server_timedout "# timeouts on server connections" server_connections "# active server connections" server_ejected_at "timestamp when server was ejected in usec since epoch" requests "# requests" request_bytes "total request bytes" responses "# responses" response_bytes "total response bytes" in_queue "# requests in incoming queue" in_queue_bytes "current request bytes in incoming queue" out_queue "# requests in outgoing queue" out_queue_bytes "current request bytes in outgoing queue"
二 编写twemproxy的pool和redis主机发现脚本
twemproxy_pools_discovery.py
#/usr/bin/python import yaml import json config_file='/data/app_platform/twemproxy/conf/nutcracker.yml' pools=[] with open(config_file,'r') as f: data=yaml.load(f) for pool in sorted(data.keys()): pools.append({'{#REDIS_POOL}':pool}) print json.dumps({'data':pools},indent=4,separators=(',',':'))
执行情况
$ python twemproxy_pools_discovery.py { "data":[ { "{#REDIS_POOL}":"gintama" } ] }
twemproxy_pools_servers_discovery.py
#/usr/bin/python import yaml import json config_file='/tmp/nutcracker.yml' pools_servers=[] with open(config_file,'r') as f: data=yaml.load(f) for pool in sorted(data.keys()): server_lists=data[pool]['servers'] for server in server_lists: lists=(pool + 'XXXX' + server).split(':') pools_servers.append({'{#REDIS_SERVERS}':(lists[0] + ':' + lists[1])}) print json.dumps({'data':pools_servers},indent=4,separators=(',',':'))
在编写脚本的过程中需要边编写边调试才能达到自己想要的效果
执行情况如下:
$ python twemproxy_pools_servers_discovery.py { "data":[ { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6379" }, { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6380" }, { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6381" }, { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6379" }, { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6380" }, { "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6381" } ] }
三 编写twemproxy状态信息获取脚本
twemproxy_status.py
这个脚本需要安装argparse模块
Python2.6使用pip install argparse安装
Python2.7,argparse模块已经设为默认模块
#! /usr/bin/env python import socket import json import argparse #{ # "service": "nutcracker", # "source": "gintama-taiwan-redis1", # "version": "0.4.0", # "uptime": 136873, # "timestamp": 1427168759, # "total_connections": 489, # "curr_connections": 7, # "gintama": { # "client_eof": 0, # "client_err": 420, # "client_connections": 0, # "server_ejects": 62, # "forward_error": 57, # "fragments": 0, # "192.168.100.68:6379": { # "server_eof": 1, # "server_err": 10, # "server_timedout": 0, # "server_connections": 1, # "server_ejected_at": 1427074216548518, # "requests": 627, # "request_bytes": 329980, # "responses": 617, # "response_bytes": 280709, # "in_queue": 0, # "in_queue_bytes": 0, # "out_queue": 0, # "out_queue_bytes": 0 # }, # # ##"{#REDIS_SERVER}":"[email protected]:6381" ##"{#REDIS_POOL}":"gintama" class NutcrackerServer(object): def __init__(self): self.server = '127.0.0.1' self.port = '22222' def nutcracker_status(self): conn = socket.create_connection((self.server, self.port)) buf = True content = '' while buf: buf = conn.recv(4096) content += buf conn.close() self.data = json.loads(content) def nutcracker_info(self,base_metric): self.nutcracker_status() print self.data[base_metric] def nutcracker_pool_info(self,pool,pool_metric): self.nutcracker_status() print self.data[pool][pool_metric] def nutcracker_server_info(self,pool,redis_server,server_metric): self.nutcracker_status() print self.data[pool][redis_server][server_metric] def parse_args(): parser=argparse.ArgumentParser(description='Twemproxy monitoring tool with Zabbix!', argument_default=False) parser.add_argument('--metric',default='service',dest='metric',action='store',required=True,help='the twemproxy metric,such as uptime or version,etc') parser.add_argument("--poolname",default='gintama',dest='poolname',action='store',required=False,help='the twemproxy pool and server discoveried by zabbix') parser.add_argument("--poolserver",default='gintamaXXXX192.168.100.69:6380',dest='poolserver',action='store',required=False,help='the twemproxy pool and server discoveried by zabbix') args=parser.parse_args() return args def main(): args=parse_args() metric='' pool='' redis_server='' if args.metric: metric=args.metric else: print "invalid metric" if args.poolname: pool=args.poolname else: print "invalid pool name" if args.poolserver: pool=args.poolserver.split('XXXXX')[0] redis_server=args.poolserver.split('XXXX')[1] else: print "invalid pool server" twemproxy = NutcrackerServer() if not metric: print "invalid metric" elif metric in ["service","source","version","uptime","timestamp","total_connections","curr_connections"]: # print "nutcracker base information" twemproxy.nutcracker_info(metric) elif pool!="" and metric in ["client_eof","client_err","client_connections","server_ejects","forward_error","fragments"]: # print "nutcracker" + " " + pool + " " + "information" twemproxy.nutcracker_pool_info(pool,metric) elif pool and redis_server and metric in ["server_eof","server_err","server_timedout","server_connections","server_ejected_at","requests","request_bytes","responses","response_bytes","in_queue","in_queue_bytes","out_queue","out_queue_bytes"]: # print "nutcracker" + " " + pool + " " + redis_server + " " + "information" twemproxy.nutcracker_server_info(pool,redis_server,metric) else: print "invalid metric" if __name__ == '__main__': main()
四 添加zabbix子配置文件
twemproxy_status.conf
### Option: UserParameter # User-defined parameter to monitor. There can be several user-defined parameters. # Format: UserParameter=, # See 'zabbix_agentd' directory for examples. # # Mandatory: no # Default: # UserParameter= UserParameter=redis_pool.discovery,/usr/bin/python /usr/local/zabbix/bin/twemproxy_pools_discovery.py UserParameter=redis_server.discovery,/usr/bin/python /usr/local/zabbix/bin/twemproxy_pools_servers_discovery.py UserParameter=nutcracker_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1 UserParameter=nutcracker_pool_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1 --poolname $2 UserParameter=nutcracker_server_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1 --poolserve r $2
五 添加zabbix模板
参见附件
参考文档:
http://www.bejson.com/
https://github.com/gfranxman/NutcrackerMonitor/blob/master/ballgazer.py