集群环境日志抓取的python脚本

日志抓取的python脚本

 

#! /usr/bin/env python

import getopt, sys, os, re
from stat import *
from datetime import datetime
from datetime import timedelta

LOG_LEVELS = ['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']

def usage() :
    print """Usage: %s [OPTIONS...] grab logs in specified duration
    Options:
        -d, --log_dir=<log_dir>         directory of log files
        -b, --base_name=<base_name>     log file base name
        -s, --start_time=<start_time>   start time, mandatory
        -e, --end_time=<end_time>       end time, optional, default: now
        -l, --level=<level>             lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR
        -k, --keyword=<keyword>         keyword to grab
        -h, --help                      display this help
    Note: time format: %%Y-%%m-%%d %%H:%%M, e.g. 2012-07-07 10:00
    """ % (sys.argv[0])

def parse_datetime(arg) :
    try :
        return datetime.strptime(arg, '%Y-%m-%d %H:%M')
    except ValueError, err :
        print 'ERROR: datetime formart error\n'
        usage()
        sys.exit(1)

def print_match_logs(log_file, start_time, end_time, levels, keyword) :
    f = open(log_file)
    for line in f :
        m = re.match('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s\[.+\] (\w+) .+', line)
        if m :
            timestr = m.group(1)
            levelstr = m.group(2)
            try :
                cur_time = datetime.strptime(timestr,'%Y-%m-%d %H:%M:%S')
                if cur_time >= start_time and cur_time <= end_time and levelstr in levels :
                    if keyword :
                        if -1 != line.find(keyword) :
                            print line
                    else :
                        print line
            except ValueError :
                pass

    f.close()

def log_extract(log_dir, log_file_base_name, start_time, end_time, levels, keyword) :
    start_date = start_time.replace(hour=0, minute=0, second=0,microsecond=0)
    end_date = end_time.replace(hour=0, minute=0, second=0,microsecond=0)
	today = datetime.today().replace(hour=0, minute=0, second=0,microsecond=0)
    while start_date <= end_date :
        log_file = None
        if start_date == today :
            log_file = log_dir + log_file_base_name
        else :
            log_file = log_dir + log_file_base_name + '.' + datetime.strftime(start_date, '%Y-%m-%d')
        if os.path.isfile(log_file) :
            print_match_logs(log_file, start_time, end_time, levels, keyword)
        else :
            print 'WARN: log file %s does not exist' % log_file
        start_date += timedelta(days=1)

def get_levels(level) :
    for i, l in enumerate(LOG_LEVELS) :
        if (level == l) :
            return LOG_LEVELS[i:]

def main() :
    try :
        opts, args = getopt.getopt(sys.argv[1:], 'd:b:s:e:l:k:h', ['log_dir=', 'base_dir=', 'start_time=', 'end_time=', 'level=', 'keyword=', 'help'])
    except getopt.GetoptError, err :
        print str(err)
        usage()
        sys.exit(1)

    now = datetime.now()
    log_dir = None
    log_file_base_name = None
    start_time = None
    end_time = now
    level = 'ERROR'
    keyword = None
    for o, a in opts :
        if o in ('-d', '--log_dir') :
            log_dir = a
        if o in ('-b', '--base_name') :
            log_file_base_name = a
        elif o in ('-s', '--start_time') :
            start_time = parse_datetime(a)
        elif o in ('-e', '--end_time') :
            end_time = parse_datetime(a)
        elif o in ('-l', '--level') :
            level = a
        elif o in ('-k', '--keyword') :
            keyword = a
        elif o in ('-h', '--help') :
            usage()
            sys.exit(0)

    if not log_dir :
        print 'ERROR: log_dir is mandatory'
        sys.exit(1)
    else :
        if not log_dir[len(log_dir) - 1] == '/' :
            log_dir += '/'

    if not log_file_base_name :
        print 'ERROR: log_file_base_name is mandatory'
        sys.exit(1)

    if not start_time :
        print 'ERROR: start_time is mandatory'
        sys.exit(1)

    if start_time > datetime.now() or end_time > datetime.now() :
        print 'ERROR: start_time or end_time is over now'
        sys.exit(1)

    if start_time > end_time :
        print 'ERROR: start_time is over end_time'
        sys.exit(1)

    if level not in LOG_LEVELS :
        print 'ERROR: invalid level'
        sys.exit(1)

    log_extract(log_dir, log_file_base_name, start_time, end_time, get_levels(level), keyword)
if __name__ == '__main__' :
    main()

  集群抓取日志的bash脚本

loggrab.sh

 

#!/bin/bash

LOG_LEVELS=('DEBUG' 'INFO' 'WARN' 'ERROR' 'FATAL')

function in_array() {
    local findee=${1}
    shift

    for elem in ${@}; do 
        [ $elem == $findee ] && return 0
    done

    return 1
}

function usage() {
    cat<<EOD
Usage: `basename $0` [OPTIONS...] grab logs in specified duration
    Options:
        -s     start time, mandatory
        -e    end time, optional, default: now
        -l    lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR
        -k    grab keyword
        -h    display this help
EOD
}

function parse_args() {
    START_TIME=
    END_TIME=`date "+%Y-%m-%d %H:%M"` # default now
    LOG_LEVEL=ERROR
    KEYWORD=

    OPTIONS=:s:e:l:k:h:
    while getopts $OPTIONS OP
    do
      case $OP in
        s)START_TIME=$OPTARG;;
        e)END_TIME=$OPTARG;;
        l)LOG_LEVEL=$OPTARG;;
        k)KEYWORD=$OPTARG;;
        h | ?)usage;;
      esac
    done

    if [ -n "$START_TIME" ] ;then
        START_TIME=`date -d $START_TIME "+%Y-%m-%d %H:%M"` || exit 1
    else
        echo "ERROR: start time is mandatory"; exit 1
    fi

    if [ -n "$END_TIME" ] ; then
        END_TIME=`date -d "$END_TIME" "+%Y-%m-%d %H:%M"` || exit 1
    fi

    if [ -n "$KEYWORD" ] ; then
        KEYWORD="-k \"$KEYWORD\""
    fi

    in_array $LOG_LEVEL ${LOG_LEVELS[@]}
    if [ 0 -ne $? ]; then
        echo "ERROR: unknow log leve: $LOG_LEVEL"; exit 1
    fi
}

function main() {
	STARTUP_DIR=`pwd`
	BASE_DIR=$STARTUP_DIR/`dirname $0`
    . $BASE_DIR/conf.sh || { exit 1; }
    parse_args $@
    pssh -i -H $HOSTS "python $LOG_GRAB_PY -d \"$LOG_DIR\" -b \"$BASE_NAME\" -s \"$START_TIME\" -e \"$END_TIME\" -l $LOG_LEVEL $KEYWORD > $TMP_LOG_FILE_PATH" || { exit 1; }

	grab_log=grab.log.`date "+%s"`
    for host in $HOSTS; do
		tmp_log=/tmp/$host.log
        scp $host:$TMP_LOG_FILE_PATH $tmp_log
		cat $tmp_log >> $grab_log
    done
	less $grab_log
}

main $@

 conf.sh

 

#!/bin/bash
LOG_DIR="/tomcat6/logs/"
BASE_NAME="demo.log"
HOSTS=10.249.213.154 10.249.213.155

LOG_GRAB_PY=/loggrab.py

TMP_LOG_FILE_PATH=/tmp/log.`date "+%s"`
 

 

你可能感兴趣的:(python,bash,集群日志)