日志抓取的python脚本
#! /usr/bin/env python import getopt, sys, os, re from stat import * from datetime import datetime from datetime import timedelta LOG_LEVELS = ['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'] def usage() : print """Usage: %s [OPTIONS...] grab logs in specified duration Options: -d, --log_dir=<log_dir> directory of log files -b, --base_name=<base_name> log file base name -s, --start_time=<start_time> start time, mandatory -e, --end_time=<end_time> end time, optional, default: now -l, --level=<level> lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR -k, --keyword=<keyword> keyword to grab -h, --help display this help Note: time format: %%Y-%%m-%%d %%H:%%M, e.g. 2012-07-07 10:00 """ % (sys.argv[0]) def parse_datetime(arg) : try : return datetime.strptime(arg, '%Y-%m-%d %H:%M') except ValueError, err : print 'ERROR: datetime formart error\n' usage() sys.exit(1) def print_match_logs(log_file, start_time, end_time, levels, keyword) : f = open(log_file) for line in f : m = re.match('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s\[.+\] (\w+) .+', line) if m : timestr = m.group(1) levelstr = m.group(2) try : cur_time = datetime.strptime(timestr,'%Y-%m-%d %H:%M:%S') if cur_time >= start_time and cur_time <= end_time and levelstr in levels : if keyword : if -1 != line.find(keyword) : print line else : print line except ValueError : pass f.close() def log_extract(log_dir, log_file_base_name, start_time, end_time, levels, keyword) : start_date = start_time.replace(hour=0, minute=0, second=0,microsecond=0) end_date = end_time.replace(hour=0, minute=0, second=0,microsecond=0) today = datetime.today().replace(hour=0, minute=0, second=0,microsecond=0) while start_date <= end_date : log_file = None if start_date == today : log_file = log_dir + log_file_base_name else : log_file = log_dir + log_file_base_name + '.' + datetime.strftime(start_date, '%Y-%m-%d') if os.path.isfile(log_file) : print_match_logs(log_file, start_time, end_time, levels, keyword) else : print 'WARN: log file %s does not exist' % log_file start_date += timedelta(days=1) def get_levels(level) : for i, l in enumerate(LOG_LEVELS) : if (level == l) : return LOG_LEVELS[i:] def main() : try : opts, args = getopt.getopt(sys.argv[1:], 'd:b:s:e:l:k:h', ['log_dir=', 'base_dir=', 'start_time=', 'end_time=', 'level=', 'keyword=', 'help']) except getopt.GetoptError, err : print str(err) usage() sys.exit(1) now = datetime.now() log_dir = None log_file_base_name = None start_time = None end_time = now level = 'ERROR' keyword = None for o, a in opts : if o in ('-d', '--log_dir') : log_dir = a if o in ('-b', '--base_name') : log_file_base_name = a elif o in ('-s', '--start_time') : start_time = parse_datetime(a) elif o in ('-e', '--end_time') : end_time = parse_datetime(a) elif o in ('-l', '--level') : level = a elif o in ('-k', '--keyword') : keyword = a elif o in ('-h', '--help') : usage() sys.exit(0) if not log_dir : print 'ERROR: log_dir is mandatory' sys.exit(1) else : if not log_dir[len(log_dir) - 1] == '/' : log_dir += '/' if not log_file_base_name : print 'ERROR: log_file_base_name is mandatory' sys.exit(1) if not start_time : print 'ERROR: start_time is mandatory' sys.exit(1) if start_time > datetime.now() or end_time > datetime.now() : print 'ERROR: start_time or end_time is over now' sys.exit(1) if start_time > end_time : print 'ERROR: start_time is over end_time' sys.exit(1) if level not in LOG_LEVELS : print 'ERROR: invalid level' sys.exit(1) log_extract(log_dir, log_file_base_name, start_time, end_time, get_levels(level), keyword) if __name__ == '__main__' : main()
集群抓取日志的bash脚本
loggrab.sh
#!/bin/bash LOG_LEVELS=('DEBUG' 'INFO' 'WARN' 'ERROR' 'FATAL') function in_array() { local findee=${1} shift for elem in ${@}; do [ $elem == $findee ] && return 0 done return 1 } function usage() { cat<<EOD Usage: `basename $0` [OPTIONS...] grab logs in specified duration Options: -s start time, mandatory -e end time, optional, default: now -l lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR -k grab keyword -h display this help EOD } function parse_args() { START_TIME= END_TIME=`date "+%Y-%m-%d %H:%M"` # default now LOG_LEVEL=ERROR KEYWORD= OPTIONS=:s:e:l:k:h: while getopts $OPTIONS OP do case $OP in s)START_TIME=$OPTARG;; e)END_TIME=$OPTARG;; l)LOG_LEVEL=$OPTARG;; k)KEYWORD=$OPTARG;; h | ?)usage;; esac done if [ -n "$START_TIME" ] ;then START_TIME=`date -d $START_TIME "+%Y-%m-%d %H:%M"` || exit 1 else echo "ERROR: start time is mandatory"; exit 1 fi if [ -n "$END_TIME" ] ; then END_TIME=`date -d "$END_TIME" "+%Y-%m-%d %H:%M"` || exit 1 fi if [ -n "$KEYWORD" ] ; then KEYWORD="-k \"$KEYWORD\"" fi in_array $LOG_LEVEL ${LOG_LEVELS[@]} if [ 0 -ne $? ]; then echo "ERROR: unknow log leve: $LOG_LEVEL"; exit 1 fi } function main() { STARTUP_DIR=`pwd` BASE_DIR=$STARTUP_DIR/`dirname $0` . $BASE_DIR/conf.sh || { exit 1; } parse_args $@ pssh -i -H $HOSTS "python $LOG_GRAB_PY -d \"$LOG_DIR\" -b \"$BASE_NAME\" -s \"$START_TIME\" -e \"$END_TIME\" -l $LOG_LEVEL $KEYWORD > $TMP_LOG_FILE_PATH" || { exit 1; } grab_log=grab.log.`date "+%s"` for host in $HOSTS; do tmp_log=/tmp/$host.log scp $host:$TMP_LOG_FILE_PATH $tmp_log cat $tmp_log >> $grab_log done less $grab_log } main $@
conf.sh
#!/bin/bash LOG_DIR="/tomcat6/logs/" BASE_NAME="demo.log" HOSTS=10.249.213.154 10.249.213.155 LOG_GRAB_PY=/loggrab.py TMP_LOG_FILE_PATH=/tmp/log.`date "+%s"`