日志每天都会割接重建,所以脚本需要每天都重启(通过crontab shell脚本实现),因为割接重建后tail -f拿不到添加的日志条目。
#!/usr/bin/env python # ./ -f log import sys import os import getopt import subprocess import time def worker(line): backup = open("backup.txt","a") backup.write(line + "\n") backup.close() def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'f:') except getopt.GetoptError, err: print str(err) sys.exit(1) filename = '' for k, v in opts: if k == '-f': filename = v if not (filename and os.path.exists(filename)): print 'invalid filename : %s' % filename sys.exit(1) cmd = ('tail', '-f', filename) print ' '.join(cmd) output = subprocess.Popen(cmd, stdout=subprocess.PIPE) while True: try: line = output.stdout.readline() except KeyboardInterrupt: print "Quit" sys.exit(1) if not line: time.sleep(0.01) continue line = line.strip().decode('utf8') #do your job here worker(line) time.sleep(0.01) return 0 if __name__ == '__main__': sys.exit(main())
为了让程序后台运行,可以使用nohup ./ -f log &
考虑到监控脚本可能异常down掉,需要记录当前解析到的位置(使用tail -n指定位置),不然tail -f取出的消息可能重复或者丢失。经过我的测试,当log文件很大的时候调用wc -l有延时,导致tail的位置不够准确,而且调用tail系统cpu占用过高。下面用简单的文件操作实现。
#!/usr/bin/env python # ./ -f log import Queue import threading import sys import os import getopt import time import urllib, urllib2 import random thread_count = 5 current_pos = 0 mutex = threading.Lock() class MyThread(threading.Thread): def __init__(self, workQueue, resultQueue,timeout=0, **kwargs): threading.Thread.__init__(self, kwargs=kwargs) self.timeout = 0 self.setDaemon(True) self.workQueue = workQueue self.resultQueue = resultQueue self.start() def run(self): while True: try: callable, args, kwargs = self.workQueue.get(timeout=self.timeout) while True: res = callable(args, self.getName()) if res == 1: break time.sleep(1) except Queue.Empty: time.sleep(1) continue except : print sys.exc_info() #raise class ThreadPool: def __init__( self, num_of_threads=10): self.workQueue = Queue.Queue() self.resultQueue = Queue.Queue() self.threads = [] self.__createThreadPool( num_of_threads ) def __createThreadPool( self, num_of_threads ): for i in range( num_of_threads ): thread = MyThread( self.workQueue, self.resultQueue ) self.threads.append(thread) def wait_for_complete(self): while len(self.threads): thread = self.threads.pop() if thread.isAlive(): thread.join() def add_job( self, callable, args, **kwargs ): while True: if self.workQueue.qsize() < 10000: self.workQueue.put( (callable,args,kwargs) ) break time.sleep(0.1) def worker(pline, threadid): splitPos = pline.index("###") pos = pline[0:splitPos] line = pline[splitPos+3:] #deal with each log time.sleep(0.001) back = open("/usr/local/nginx/logs/access.log.bak","a") if mutex.acquire(1): global current_pos current_pos = pos backup = open("log_watcher.pos","w") backup.write(current_pos) backup.close() backup = open("log_watcher.pos.bak","w") backup.write(current_pos) backup.close() back.write(line+"\n") mutex.release() back.close() return 1 def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'f:c:h:') except getopt.GetoptError, err: print str(err) sys.exit(1) filename = '' global current_pos current_pos = 0 global mn_url for k, v in opts: if k == '-f': filename = v elif k == '-c': current_pos = int(v) elif k == '-h': local_ip = v if not (filename and os.path.exists(filename)): print 'invalid filename : %s' % filename sys.exit(1) input = open(filename); threadPool = ThreadPool(thread_count); while True: pos = input.tell() line = input.readline() line = line.strip() #trim the last "\n" if not line: time.sleep(0.3) continue threadPool.add_job(worker, "%d###%s"%(pos,line)) return 0 if __name__ == '__main__': sys.exit(main())
#!/bin/bash LOGNAME="/usr/local/nginx/logs/access.log" killall python sleep 1 cd /usr/local/nginx/sbin/ #log currentpos if [ -e "log_watcher.pos" ] then currentPos=`cat log_watcher.pos` if [ -z $currentPos ] then currentPos=`cat log_watcher.pos.bak` if [ -z $currentPos ] then exit -1 fi fi else currentPos=0 fi #refresh pos if log has been dealt fileSize=`ls -l $LOGNAME | awk '{print $5}'` if [ -z $currentPos ] then currentPos=0 fi if [ $fileSize -lt $currentPos ] then rm /usr/local/nginx/logs/access.log.bak currentPos=0 fi localip=`/sbin/ifconfig -a|grep "inet addr:10."|awk '{print $2}'|awk -F: '{print $2}'|sed q` CMD="./ -f $LOGNAME -c $currentPos -h $localip" $CMD
使用tail -f虽然可以做到实时统计日志,但是程序重启时依赖wc -l这样的外部命令,耗时导致不够准确