单个服务器上创建多实例,对其重要参数的监控是非常重要的,以下是监控服务器上多实例的负载情况:主要包含:redis实例的QPS、内存使用情况、客户端连接数,服务器的内存使用率、CPU使用率、CPU load值、网卡流量等,脚本把采集到的数据显示并且写入到数据库中,方便查看多个服务器的多实例情况以及数据的汇总等,写的优点仓促,有兴趣的同学可以整理一下脚本使其模块化、简洁化。脚本如下:
#!/usr/bin/env python #-*-coding:UTF-8-*- import os,threading,time,sys,sigar,MySQLdb ''' 安装python的sigar模块 apt-get install libtool automake gettext python-MySQLdb screen pkg-config git git clone git://github.com/hyperic/sigar.git sigar.git ./autogen.sh ./configure make make install cd bindings/python/ python setup.py install ''' ''' 建库sql CREATE TABLE `redis_stats` ( `id` int(11) NOT NULL AUTO_INCREMENT, `host_name` varchar(50) NOT NULL, `qps` int(11) NOT NULL, `clients` int(11) NOT NULL, `redis_mem_used` varchar(50) NOT NULL, `sys_mem_used_pers` float NOT NULL, `cpu_used` float NOT NULL, `cpu_load` varchar(50) NOT NULL, `netband` varchar(255) NOT NULL, `uptime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, PRIMARY KEY (`id`), KEY `host_name` (`host_name`), KEY `uptime` (`uptime`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 查询每个实例的最新记录 select host_name,qps,clients,redis_mem_used,concat(sys_mem_used_pers,'%') as sys_mem_used,concat(cpu_used,'%') as cpu_used,cpu_load,netband from DBA.redis_stats group by host_name desc order by qps desc; ''' def log2db(check_log): log_host = '192.168.56.101' log_user = 'root' log_pass = '1q2w3e4r' try: conn = MySQLdb.connect(host = log_host,port = 3306,user = log_user,passwd = log_pass,charset='utf8',connect_timeout=20) cursor = conn.cursor() #cursor.execute(insert_sql) cursor.executemany("INSERT INTO redis.redis_stats (`host_name`,`qps`,`clients`,`redis_mem_used`,`sys_mem_used_pers`,`cpu_used`,`cpu_load`,`netband`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",tuple(check_log)) conn.commit() cursor.close() conn.close() except Exception,e: print e def redis_info(host,port,res): var = [] var.append(host) var.append(port) aaa = os.popen("redis-cli -h %s -p %s info|grep -v '#'|tr -s '\r\n'" % (host,port)).readlines() dirc = {} for i in aaa: if i != '\r\n': a = i.strip() aa = a.split(":") dirc[aa[0]]=aa[1] var.append(dirc["connected_clients"]) var.append(dirc["instantaneous_ops_per_sec"]) var.append(dirc["used_memory_human"]) res.append(var) def main(): netband = {} stime = 5 while True: try: sg = sigar.open() mem = sg.mem()#内存 mem_percent = "%.2f" % mem.used_percent() cpu = sg.cpu()#CPU总的使用率 cpu_idle = "%.2f" % ((1-float(cpu.idle())/cpu.total())*100) loadavg = sg.loadavg()#CPU load值 cpu_loadavg = ','.join([str(i) for i in loadavg]) #nets = [i for i in sg.net_interface_list() if 'dum' not in i and i != 'lo']#网卡流量统计 nets = [i.strip() for i in os.popen("/bin/ip a|grep global|awk '{print $7}'").readlines() if i.strip() != ''] if len(netband) != 0: for net in nets: netband[net+'_Out'] = "%.2f" % (float((sg.net_interface_stat(net).tx_bytes()) - int(netband[net+'_Out']))/stime/1024/1024) netband[net+'_In'] = "%.2f" % (float((sg.net_interface_stat(net).rx_bytes()) - int(netband[net+'_In']))/stime/1024/1024) else: for net in nets: netband[net+'_Out'] = "%.2f" % (float(sg.net_interface_stat(net).tx_bytes())/stime/1024/1024) netband[net+'_In'] = "%.2f" % (float(sg.net_interface_stat(net).rx_bytes())/stime/1024/1024) redis_list = ['192.168.56.101:6379','192.168.1.87:16379'] text = "*"*20 + " Redis Status %s " % time.strftime("%Y-%m-%d %H:%M:%S") + "*"*20 print "\033[1;31;40m%s\033[0m" % text threads = [] res = [] for i in redis_list: aa = i.split(':') host = aa[0] port = aa[1] t = threading.Thread(target=redis_info,args=(host,port,res)) threads.append(t) for i in range(len(threads)): threads[i].start() for i in range(len(threads)): threads[i].join() print "\033[1;35;40m%s\033[0m" % ("Redis_host:port".ljust(23)+"Redis:QPS".ljust(10)+"Redis:Clients".ljust(15)+"Redis:Mem_used".ljust(15)+"Mem_percent".ljust(12)+"Cpu_used".ljust(10)+"Cpu_loadavg".ljust(17)) All_qps = 0 All_clients = 0 res.sort() check_log = [] for i in res: log = [i[0]+':'+i[1],int(i[3]),int(i[2]),i[4],float(mem_percent),float(cpu_idle),cpu_loadavg,str(netband)] check_log.append(log) print (i[0]+':'+i[1]).ljust(23)+i[3].ljust(10)+i[2].ljust(15)+i[4].ljust(15)+mem_percent.ljust(12)+cpu_idle.ljust(10)+cpu_loadavg.ljust(17)+str(netband) All_qps = All_qps + int(i[3]) All_clients = All_clients + int(i[2]) log2db(check_log) print "\033[1;35;40m%s\033[0m" % ("Summary All host:".ljust(23)+str(All_qps).ljust(10)+str(All_clients).ljust(10)) netband = {} for net in nets: netband[net+'_Out'] = sg.net_interface_stat(net).tx_bytes() netband[net+'_In'] = sg.net_interface_stat(net).rx_bytes() time.sleep(stime) except KeyboardInterrupt : sys.exit(0) print break if __name__ == "__main__": main()
有图有真相: