python 分割apache日志

#!/usr/bin/env python
import sys
import re
log_line_re = re.compile(r'''(?P<remote_host>\S+) #IP ADDRESS
\s+ #whitespace
\S+ #remote logname
\s+ #whitespace
\S+ #remote user
\s+ #whitespace
(?P<time>\[[^\[\]]+\]) #time
\s+ #whitespace
(?P<request>"[^"]+") #first line of request
\s+ #whitespace
(?P<status>\d+)
\s+ #whitespace
(?P<bytes_sent>-|\d+)
\s* #whitespace
''', re.VERBOSE)
line ='192.168.1.37 - - [22/Jul/2010:14:51:56 +0800] "GET /xampp/head.php HTTP/1.1" 200 1362'
m = log_line_re.match(line)

groupdict = m.groupdict()
print  groupdict


[root@test ~]# python log1 
{'status': '200', 'remote_host': '192.168.1.37', 'request': '"GET /xampp/head.php HTTP/1.1"', 'bytes_sent': '1362', 'time': '[22/Jul/2010:14:51:56 +0800]'}




使用apachelog模块 http://pypi.python.org/pypi/apachelog/1.0

#!/usr/bin/env python
import apachelog
p=apachelog.parser(apachelog.formats['common'])
for line in open('/opt/lampp/logs/access_log'):
           try:
               data = p.parse(line)
               print data
           except:
              sys.stderr.write("Unable to parse %s" % line)

{'%l': '-', '%>s': '200', '%h': '192.168.1.38', '%b': '5573', '%u': '-', '%t': '[22/Jul/2010:16:02:27 +0800]', '%r': 'GET /admin/ListUserlist.php HTTP/1.1'}

你可能感兴趣的:(apache,python,职场,休闲)