[python] 系统监控

每分钟 采集一次linux信息,采集到的数据暂存到本地sqlite

10分钟通过http上报一次

sqlite库中保存7天的过期数据

monitor4a.out是所有输出,monitor4a.log是INFO级别以上的日志(100MB的5个日志文件循环)

python v2.6.6-2.7.8通过,red hat linux

#!/usr/bin/env python
# _*_ coding:UTF-8 _*_
__author__ = 'shanl'
import socket

store_root_dir= "."
configure={
    "domainid":             "BJ",
    "auth_address":         "127.0.0.1:8877",     
    "local_ip":             socket.gethostbyname(socket.gethostname()),
    "auth_service_url":     "/cat/monitor.jsp",
    "db_path":              "%s/monitor4a.db3" % store_root_dir,
    "encoding":             "UTF-8",

    "interval_collection":  60,         #60
    "interval_upload":      60*1,       #60*5
    "interval_overdue":     60*60*24*7, #60*60*24*7

    "logger_out":           "%s/monitor4a.out" % store_root_dir,
    "logger_log":           "%s/monitor4a.log" % store_root_dir,
    "logger_format":        "[%(levelname)s] %(asctime)s [line:%(lineno)d] %(message)s",
    "logger_maxBytes":      100*1024*1024,
    "logger_backupCount":   5,

    "debug":                False,
}

try:
    from os import popen
except:
    from subprocess import popen
import os
import sys
import httplib
import multiprocessing
import time
import sqlite3
from datetime import datetime as dt
from uuid import uuid4 as uuid
from zlib import crc32
import urllib
import logging
from logging.handlers import RotatingFileHandler

#logger config
logging.basicConfig(
    #filename=configure["logger_out"],
    filename=configure["logger_out"],
    level=logging.DEBUG,
    format=configure["logger_format"],
    filemode='w'
)
Rthandler = RotatingFileHandler(
    configure["logger_log"],
    maxBytes=configure["logger_maxBytes"],
    backupCount=configure["logger_backupCount"])
Rthandler.setLevel(logging.INFO)
formatter = logging.Formatter(configure["logger_format"])
Rthandler.setFormatter(formatter)
logging.getLogger('').addHandler(Rthandler)

#monitor main function
def pymain():
    logging.info("waitting...")
    with MonitorDB() as db:
        db.initTables()
        time.sleep(3)

    logging.info("monitor start...")
    p = MonitorTask()
    p.start()

def getUUID():
    return crc32("%s%s" % (uuid(),uuid()))

class MonitorDB():
    __db = None
    __init_sqls = {
        "t_monitor":'''
            create table t_monitor(
                uuid long,
                time varchar(32),
                key varchar(32),
                value varchar(64),
                overdue int
            )
        ''',
    }

    def __init__(self, dbPath=None):
        if not dbPath is None: self.connect(dbPath)
        else: self.connect(configure["db_path"])

    def connect(self, dbPath):
        logging.debug("initialize sqlite3:'%s' done." % dbPath)
        self.__db = sqlite3.connect(dbPath,5)

    def initTables(self):
        notExist = False
        for tn,sql in self.__init_sqls.items():
            cur=self.__db.cursor()
            for i in cur.execute("SELECT COUNT(*) FROM sqlite_master where type='table' and name='%s'" % tn):
                notExist = True if i[0]==0 else False
                break
            cur.close()

            if notExist:
                cur=self.__db.cursor()
                cur.execute(sql)
                cur.close()
            self.__db.commit()
        logging.debug("initialize sqlite3 tables done.")

    def close(self):
        self.__db.close()
        logging.debug("close sqlite3.")

    def save(self,rows):
        try:
            cur = self.__db.cursor()
            cur.executemany("insert into t_monitor (uuid,time,key,value,overdue)values(?,?,?,?,0)", rows)
            cur.close()
            self.__db.commit()
            logging.debug('save success,rows:%s' % rows)
        except Exception as e:
            logging.error('save error:%s,rows:%s.' % (e,rows))

    def getTop(self,n=50):
        ret = []
        try:
            cur = self.__db.cursor()
            for i in cur.execute('select uuid,time,key,value from t_monitor where overdue=0 order by time limit 0,?', (n,)):
                ret.append({
                    "uuid":i[0], "time":i[1],
                    "key":i[2], "value":i[3]
                })
            cur.close()
            self.__db.commit()
            logging.debug('getTop success,n:%d' % n)
        except Exception as e:
            logging.error("getTop error:%s,n:%d." % (e,n))
        return ret

    def setToOverdue(self,ids):
        try:
            cur = self.__db.cursor()
            cur.executemany("update t_monitor set overdue=1 where uuid=?", ids)
            cur.close()
            self.__db.commit()
            logging.debug("setToOverdue success,ids:%s" % ids)
        except Exception as e:
            logging.debug("setToOverdue error:%s,ids:%s" % e,ids)

    def deleOverdue(self):
        try:
            cur = self.__db.cursor()
            cur.execute("delete from t_monitor where overdue=1")
            cur.close()
            self.__db.commit()
            logging.debug("setToOverdue success")
        except Exception as e:
            logging.debug("setToOverdue error:%s" % e)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

class HostinfoCollection():
    def __nowTime(self):
        return dt.today().__str__()[:-7]

    def cpu(self):
        if configure["debug"]: return ('2.13')
        logging.debug('collection cpu.')
        #ls = popen("export LC_ALL=en_US && mpstat|grep 'all'").readline().strip().split()
        #cpuUse = "%.2f" % (float(ls[3])+float(ls[5]) )
        ls = popen("export LC_ALL=en_US && mpstat 1 1 |grep 'Average'").readline().strip().split()
        cpuUse = "%.2f" % (float(ls[2])+float(ls[4]) )
        #return {"cpuUse":usePer}
        return (cpuUse,)

    def memory(self):
        logging.debug('collection memory.')
        cmd = '''cat /proc/meminfo |grep -E 'MemTotal:|MemFree:|Buffers:|Cached:' '''
        ls = popen(cmd).readlines()[:-1]
        memdict = dict()
        for i in ls:
            its = i.strip().split()
            memdict[its[0][:-1]] = int(its[1])
        memdict['MemUsedPer'] =100.0*(memdict['MemTotal']-memdict['MemFree']-memdict['Buffers']-memdict['Cached'])/memdict['MemTotal']
        memMax = "%.2f" % (1.0*memdict['MemTotal']/1000/1000)
        memPer = "%.2f" % memdict['MemUsedPer']
        return (memMax,memPer)

    def disk(self):
        logging.debug('collection disk.')
        cmds = [
            "export LC_ALL=en_US && df -l|grep '% /edass4a_ssg'",
            "export LC_ALL=en_US && df -l|grep '/dev/sda1'",
            "export LC_ALL=en_US && df -l|grep '% /home'"
        ]
        diskdict = dict()
        for i in cmds:
            ls = popen(i).readline().strip().split()
            if not ls==[]:
                if len(ls)==6:
                    diskdict['cmd'] = i
                    diskdict['used'] = int(ls[2])
                    diskdict['available'] = int(ls[3])
                    diskdict['usePer'] = ls[4][:-1]
                    diskdict['total'] = diskdict['used']+diskdict['available']
                else:
                    diskdict['cmd'] = i
                    diskdict['used'] = int(ls[1])
                    diskdict['available'] = int(ls[2])
                    diskdict['usePer'] = ls[3][:-1]
                    diskdict['total'] = diskdict['used']+diskdict['available']
                break
        logging.debug("disk %s" % diskdict.__str__())
        HDMax = "%.2f" % (diskdict['total']*1.0/1000/1000)
        HDUse = "%.2f" % (float(diskdict['usePer']))
        return (HDUse,HDMax)

    def ip(self):
        logging.debug('collection ip.')
        #return {"domainid":configure["domainid"],"ip":configure["local_ip"]}
        return (configure["domainid"],configure["local_ip"])

class HttpClient:
    default_request_address =   configure["auth_address"]
    default_request_url     =   configure["auth_service_url"]
    default_request_charset =   configure["encoding"]

    def post(self,reqaddress=None,requrl=None,reqparams={},reqcharset=None):
        '''
        method=post
        @param address default_request_address
        @param requrl default_request_url
        @param reqparams {}
        @param reqcharset default_request_charset
        @return (resp.status,resp.reason,resp.read)
        '''
        _reqaddress = self.default_request_address if reqaddress is None else reqaddress
        _requrl = self.default_request_url if requrl is None else requrl
        _reqcharset = self.default_request_charset if reqcharset is None else reqcharset
        params = urllib.urlencode(reqparams)
        headers = {
            "Content-Type":     "application/x-www-form-urlencoded",
            "Connection":       "Keep-Alive",
            "Accept":           "text/plain,text/html",
            "Accept-Charset":   _reqcharset,
            "User-Agent":       "Python Client",
        }
        logging.debug('http connect to:%s,params:%s' % (_reqaddress,params) )
        httpClient = httplib.HTTPConnection(
            _reqaddress.split(':')[0],
		    port=int(_reqaddress.split(':')[1]),
			timeout=3)
        httpClient.connect()
        httpClient.request(
            method="POST",  url=_requrl,
            body=params,    headers=headers
            )
        resp = httpClient.getresponse()
        ret = (
            resp.status,
            resp.reason,
            resp.read()
        )
        httpClient.close()
        return ret

    def get(self):
        pass

class MonitorTask(multiprocessing.Process):
    __interval={
        "collection":   configure["interval_collection"],
        "upload":       configure["interval_upload"],
        "overdue":      configure["interval_overdue"],
    }

    def __init__(self, interval=None):
        if not interval is None: self.__interval = interval
        multiprocessing.Process.__init__(self)

    def __collection(self,db):
        tnow=dt.today().__str__()[:-7]

        rows=[]
        hostinfo = HostinfoCollection()

        cpu = hostinfo.cpu()
        rows.append((getUUID(),tnow,'cpuUse',cpu[0]))

        mem = hostinfo.memory()
        rows.append((getUUID(),tnow,'memMax',mem[0]))
        rows.append((getUUID(),tnow,'memPer',mem[1]))

        disk = hostinfo.disk()
        rows.append((getUUID(),tnow,'HDUse',disk[0]))
        rows.append((getUUID(),tnow,'HDMax',disk[1]))

        db.save(rows)

    def __upload(self, db):
        dbret = db.getTop(100)
        if len(dbret)==0: return

        upload_success_list = []
        ip = HostinfoCollection().ip()

        try:
            for i in dbret:
                params = {
                    'mkey':         i['key'],
                    'mvalue':       i['value'],
                    'uptime':       i['time'],
                    'domainid':     ip[0],
                    'ip':           ip[1],
                }
                hcret = HttpClient().post(reqparams=params)
                logging.debug('http response status:%d' % hcret[0])
                if 200==hcret[0]: upload_success_list.append((i['uuid'],) ); time.sleep(0.1)
                #elif 404==hcret[0]: pass
                #elif 500==hcret[0]: pass
                else: logging.error('http response status:%d,reason:%s' % (hcret[0],hcret[1]) ); break
        except Exception as e:
            logging.error("upload error:%s" % e)
        db.setToOverdue(upload_success_list)
        logging.info("upload rows: %d" % len(upload_success_list))

    def __overdue(self,db):
        db.deleOverdue()

    def run(self):
        ltime = int(time.mktime(time.localtime()))
        lastExecTime = {
            "collection":   ltime,
            "upload":       ltime,
            "overdue":      ltime
        }
        try:
            while True:
                lnow = int(time.mktime(time.localtime()))

                if lnow-lastExecTime['collection']>=self.__interval['collection']:
                    logging.info("run.collection()")
                    with MonitorDB() as db: self.__collection(db)
                    lastExecTime['collection'] = lnow

                if lnow-lastExecTime['upload']>=self.__interval['upload']:
                    logging.info("run.upload()")
                    with MonitorDB() as db: self.__upload(db)
                    lastExecTime['upload'] = lnow

                if lnow-lastExecTime['overdue']>=self.__interval['overdue']:
                    logging.info("run.overdue()")
                    with MonitorDB() as db: self.__overdue(db)
                    lastExecTime['overdue'] = lnow

                time.sleep(1)
        except Exception as e:
            logging.error("run error:%s" % e)

if __name__=="__main__":
    pymain()


服务器端,随便写个jsp或其他什么的

比如这个node.js

var LISTEN_PORT = 80;
var HOST='';

var TEST_HTML="<html><body>node.js httpServer.</body></html>\n";

function httpserver0(){	
	var server = require('http').createServer();
	server.listen(LISTEN_PORT,HOST);	
	
	server.on('request',function(req, resp){
		console.log(req.url);
	
		resp.writeHead(200, {						
			'Content-Type': 'text/html'
		});
		
		resp.end(TEST_HTML);
	});
	
	server.on('connection',function(socket){
		//console.log('new conntion.');
	});
	console.log('server running at '+HOST+':'+LISTEN_PORT);
}

httpserver0();

E:\a\node1>node httpserver1.js
server running at :80
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=0.090000&mkey=cpuUse
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=3951&mkey=memMax
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=3792&mkey=memPer
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=37869&mkey=HDUse
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=470244&mkey=HDMax
/?uptime=2014-08-04+15%3A18%3A04&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=0.090000&mkey=cpuUse
/?uptime=2014-08-04+15%3A18%3A04&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=3951&mkey=memMax
/?uptime=2014-08-04+15%3A18%3A04&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=3791&mkey=memPer
/?uptime=2014-08-04+15%3A18%3A04&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=37869&mkey=HDUse
/?uptime=2014-08-04+15%3A18%3A04&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=470244&mkey=HDMax


你可能感兴趣的:([python] 系统监控)