Apache/Nginx 访问日志分析脚本

脚本功能:

脚本采用python2.7编写,用来分析Apahce的访问日志


脚本用法:


  1. 脚本名 Apache的访问日志 [想要显示的行数]


更新:


1.第二版:增加 显示指定的行数

2.增加Nginx日志分析脚本


脚本执行效果如下:

101052973.png


脚本内容如下:

nginx访问日志分析脚本


  1. #!/usr/bin/env python

  2. # coding=utf-8

  3. #------------------------------------------------------

  4. # Name:         nginx 日志分析脚本

  5. # Purpose:      此脚本只用来分析nginx的访问日志

  6. # Version:      1.0

  7. # Author:       LEO

  8. # BLOG:         http://linux5588.blog.51cto.com

  9. # EMAIL:        [email protected]

  10. # Created:      2013-05-07

  11. # Modified:     2013-05-07

  12. # Copyright:    (c) LEO 2013

  13. #------------------------------------------------------

  14. import sys

  15. import time

  16. #该类是用来打印格式

  17. class displayFormat(object):

  18. def format_size(self,size):

  19. '''''格式化流量单位'''

  20.        KB = 1024#KB -> B  B是字节

  21.        MB = 1048576#MB -> B

  22.        GB = 1073741824#GB -> B

  23.        TB = 1099511627776#TB -> B

  24. if size >= TB :

  25.            size = str(size / TB) + 'T'

  26. elif size < KB :

  27.            size = str(size) + 'B'

  28. elif size >= GB and size < TB:

  29.            size = str(size / GB) + 'G'

  30. elif size >= MB and size < GB :

  31.            size = str(size / MB) + 'M'

  32. else :

  33.            size = str(size / KB) + 'K'

  34. return size

  35. #定义字符串格式化

  36.    formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'

  37. def transverse_line(self) :

  38. '''''输出横线'''

  39. printself.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)

  40. def head(self):

  41. '''''输出头部信息'''

  42. printself.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')

  43. def error_print(self) :

  44. '''''输出错误信息'''

  45. print

  46. print'Usage : ' + sys.argv[0] + ' NginxLogFilePath [Number]'

  47. print

  48.        sys.exit(1)

  49. def execut_time(self):

  50. '''''输出脚本执行的时间'''

  51. print

  52. print"Script Execution Time: %.3f second" % time.clock()

  53. print

  54. #该类是用来生成主机信息的字典

  55. class hostInfo(object):

  56.    host_info = ['200','404','500','302','304','503','403','times','size']

  57. def __init__(self,host):

  58. self.host = host = {}.fromkeys(self.host_info,0)

  59. def increment(self,status_times_size,is_size):

  60. '''''该方法是用来给host_info中的各个值加1'''

  61. if status_times_size == 'times':

  62. self.host['times'] += 1

  63. elif is_size:

  64. self.host['size'] = self.host['size'] + status_times_size

  65. else:

  66. self.host[status_times_size] += 1

  67. def get_value(self,value):

  68. '''''该方法是取到各个主机信息中对应的值'''

  69. returnself.host[value]

  70. #该类是用来分析文件

  71. class fileAnalysis(object):

  72. def __init__(self):

  73. '''''初始化一个空字典'''

  74. self.report_dict = {}

  75. self.total_request_times,self.total_traffic,self.total_200, \

  76. self.total_404,self.total_500,self.total_403,self.total_302, \

  77. self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0

  78. def split_eachline_todict(self,line):

  79. '''''分割文件中的每一行,并返回一个字典'''

  80.        split_line = line.split()

  81.        split_dict = {'remote_host':split_line[0],'status':split_line[8],\

  82. 'bytes_sent':split_line[9],}

  83. return split_dict

  84. def generate_log_report(self,logfile):

  85. '''''读取文件,分析split_eachline_todict方法生成的字典'''

  86. for line in logfile:

  87. try:

  88.                line_dict = self.split_eachline_todict(line)

  89.                host = line_dict['remote_host']

  90.                status = line_dict['status']

  91. except ValueError :

  92. continue

  93. except IndexError :

  94. continue

  95. if host notinself.report_dict :

  96.                host_info_obj = hostInfo(host)

  97. self.report_dict[host] = host_info_obj

  98. else :

  99.                host_info_obj = self.report_dict[host]

  100.            host_info_obj.increment('times',False)  

  101. if status in host_info_obj.host_info :

  102.                host_info_obj.increment(status,False)  

  103. try:

  104.                bytes_sent = int(line_dict['bytes_sent'])

  105. except ValueError:

  106.                bytes_sent = 0

  107.            host_info_obj.increment(bytes_sent,True)  

  108. returnself.report_dict

  109. def return_sorted_list(self,true_dict):

  110. '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''

  111. for host_key in true_dict :

  112.            host_value = true_dict[host_key]

  113.            times = host_value.get_value('times')                        

  114. self.total_request_times = self.total_request_times + times  

  115.            size = host_value.get_value('size')                        

  116. self.total_traffic = self.total_traffic + size  

  117.            o200 = host_value.get_value('200')

  118.            o404 = host_value.get_value('404')

  119.            o500 = host_value.get_value('500')

  120.            o403 = host_value.get_value('403')

  121.            o302 = host_value.get_value('302')

  122.            o304 = host_value.get_value('304')

  123.            o503 = host_value.get_value('503')

  124.            true_dict[host_key] = {'200':o200,'404':o404,'500':o500,\

  125. '403':o403,'302':o302,'304':o304, \

  126. '503':o503,'times':times,'size':size}

  127. self.total_200 = self.total_200 + o200

  128. self.total_404 = self.total_404 + o404

  129. self.total_500 = self.total_500 + o500

  130. self.total_302 = self.total_302 + o302

  131. self.total_304 = self.total_304 + o304

  132. self.total_503 = self.total_503 + o503

  133.        sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],\

  134.                                                             t[1]['size']),reverse=True)

  135. return sorted_list

  136. class Main(object):

  137. def main(self) :

  138. '''''主调函数'''

  139.        display_format = displayFormat()

  140.        arg_length = len(sys.argv)

  141. if arg_length == 1 :

  142.            display_format.error_print()

  143. elif arg_length == 2or arg_length == 3:

  144.            infile_name = sys.argv[1]

  145. try :

  146.                infile = open(infile_name,'r')

  147. if arg_length == 3 :

  148.                    lines = int(sys.argv[2])

  149. else :

  150.                    lines = 0

  151. except IOError,e :

  152. print

  153. print e

  154.                display_format.error_print()

  155. except ValueError :

  156. print

  157. print"Please Enter A Volid Number !!"

  158.                display_format.error_print()

  159. else :

  160.            display_format.error_print()

  161.        fileAnalysis_obj = fileAnalysis()

  162.        not_true_dict = fileAnalysis_obj.generate_log_report(infile)

  163.        log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)

  164.        total_ip = len(log_report)

  165. if lines :

  166.            log_report = log_report[0:lines]

  167.        infile.close()

  168. print

  169.        total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)

  170.        total_request_times = fileAnalysis_obj.total_request_times

  171. print'Total IP: %s   Total Traffic: %s   Total Request Times: %d' \

  172.              % (total_ip,total_traffic,total_request_times)

  173. print

  174.        display_format.head()

  175.        display_format.transverse_line()

  176. for host in log_report :

  177.            times = host[1]['times']

  178.            times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100

  179. print display_format.formatstring % (host[0],\

  180.                                                 display_format.format_size(host[1]['size']),\

  181.                                                 times,str(times_percent)[0:5],\

  182.                                                 host[1]['200'],host[1]['404'],\

  183.                                                 host[1]['500'],host[1]['403'],\

  184.                                                 host[1]['302'],host[1]['304'],host[1]['503'])

  185. if (not lines) or total_ip == lines :

  186.            display_format.transverse_line()

  187. print display_format.formatstring % (total_ip,total_traffic, \

  188.                                                 total_request_times,'100%',\

  189.                                                 fileAnalysis_obj.total_200,\

  190.                                                 fileAnalysis_obj.total_404,\

  191.                                                 fileAnalysis_obj.total_500, \

  192.                                                 fileAnalysis_obj.total_403,\

  193.                                                 fileAnalysis_obj.total_302, \

  194.                                                 fileAnalysis_obj.total_304,\

  195.                                                 fileAnalysis_obj.total_503)

  196.        display_format.execut_time()

  197. if __name__ == '__main__':

  198.    main_obj = Main()

  199.    main_obj.main()


Apache访问日志分析脚本【第二版】


  1. #!/usr/bin/env python

  2. # coding=utf-8

  3. #------------------------------------------------------

  4. # Name:         Apache 日志分析脚本

  5. # Purpose:      此脚本只用来分析Apache的访问日志

  6. # Version:      2.0

  7. # Author:       LEO

  8. # BLOG:         http://linux5588.blog.51cto.com

  9. # EMAIL:        [email protected]

  10. # Created:      2013-4-26

  11. # Modified:     2013-5-4

  12. # Copyright:    (c) LEO 2013

  13. #------------------------------------------------------

  14. import sys

  15. import time

  16. #该类是用来打印格式

  17. class displayFormat(object):

  18. def format_size(self,size):

  19. '''''格式化流量单位'''

  20.        KB = 1024

  21.        MB = 1048576

  22.        GB = 1073741824

  23.        TB = 1099511627776

  24. if size >= TB :

  25.            size = str(size / TB) + 'T'

  26. elif size < KB :

  27.            size = str(size) + 'B'

  28. elif size >= GB and size < TB:

  29.            size = str(size / GB) + 'G'

  30. elif size >= MB and size < GB :

  31.            size = str(size / MB) + 'M'

  32. else :

  33.            size = str(size / KB) + 'K'

  34. return size

  35.    formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'

  36. def transverse_line(self) :

  37. '''''输出横线'''

  38. printself.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)

  39. def head(self):

  40. '''''输出头部信息'''

  41. printself.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')

  42. def error_print(self) :

  43. '''''输出错误信息'''

  44. print

  45. print'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'

  46. print

  47.        sys.exit(1)

  48. def execut_time(self):

  49. '''''输出脚本执行的时间'''

  50. print

  51. print"Script Execution Time: %.3f second" % time.clock()

  52. print

  53. #该类是用来生成主机信息的字典

  54. class hostInfo(object):

  55.    host_info = ['200','404','500','302','304','503','403','times','size']

  56. def __init__(self,host):

  57. self.host = host = {}.fromkeys(self.host_info,0)

  58. def increment(self,status_times_size,is_size):

  59. '''''该方法是用来给host_info中的各个值加1'''

  60. if status_times_size == 'times':

  61. self.host['times'] += 1

  62. elif is_size:

  63. self.host['size'] = self.host['size'] + status_times_size

  64. else:

  65. self.host[status_times_size] += 1

  66. def get_value(self,value):

  67. '''''该方法是取到各个主机信息中对应的值'''

  68. returnself.host[value]

  69. #该类是用来分析文件

  70. class fileAnalysis(object):

  71. def __init__(self):

  72. '''''初始化一个空字典'''

  73. self.report_dict = {}

  74. self.total_request_times,self.total_traffic,self.total_200, \

  75. self.total_404,self.total_500,self.total_403,self.total_302, \

  76. self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0

  77. def split_eachline_todict(self,line):

  78. '''''分割文件中的每一行,并返回一个字典'''

  79.        split_line = line.split()

  80.        split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}

  81. return split_dict

  82. def generate_log_report(self,logfile):

  83. '''''读取文件,分析split_eachline_todict方法生成的字典'''

  84. for line in logfile:

  85. try:

  86.                line_dict = self.split_eachline_todict(line)

  87.                host = line_dict['remote_host']

  88.                status = line_dict['status']

  89. except ValueError :

  90. continue

  91. except IndexError :

  92. continue

  93. if host notinself.report_dict :

  94.                host_info_obj = hostInfo(host)

  95. self.report_dict[host] = host_info_obj

  96. else :

  97.                host_info_obj = self.report_dict[host]

  98.            host_info_obj.increment('times',False)    

  99. if status in host_info_obj.host_info :  

  100.                host_info_obj.increment(status,False)  

  101. try:

  102.                bytes_sent = int(line_dict['bytes_sent'])  

  103. except ValueError:

  104.                bytes_sent = 0

  105.            host_info_obj.increment(bytes_sent,True)

  106. returnself.report_dict

  107. def return_sorted_list(self,true_dict):

  108. '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''

  109. for host_key in true_dict :

  110.            host_value = true_dict[host_key]

  111.            times = host_value.get_value('times')  

  112. self.total_request_times = self.total_request_times + times  

  113.            size = host_value.get_value('size')  

  114. self.total_traffic = self.total_traffic + size  

  115.            o200 = host_value.get_value('200')

  116.            o404 = host_value.get_value('404')

  117.            o500 = host_value.get_value('500')

  118.            o403 = host_value.get_value('403')

  119.            o302 = host_value.get_value('302')

  120.            o304 = host_value.get_value('304')

  121.            o503 = host_value.get_value('503')

  122.            true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, \

  123. '503':o503,'times':times,'size':size}

  124. self.total_200 = self.total_200 + o200

  125. self.total_404 = self.total_404 + o404

  126. self.total_500 = self.total_500 + o500

  127. self.total_302 = self.total_302 + o302

  128. self.total_304 = self.total_304 + o304

  129. self.total_503 = self.total_503 + o503

  130.        sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)

  131. return sorted_list

  132. class Main(object):

  133. def main(self) :

  134. '''''主调函数'''

  135.        display_format = displayFormat()

  136.        arg_length = len(sys.argv)

  137. if arg_length == 1 :

  138.            display_format.error_print()

  139. elif arg_length == 2or arg_length == 3:

  140.            infile_name = sys.argv[1]

  141. try :

  142.                infile = open(infile_name,'r')

  143. if arg_length == 3 :

  144.                    lines = int(sys.argv[2])

  145. else :

  146.                    lines = 0

  147. except IOError,e :

  148. print

  149. print e

  150.                display_format.error_print()

  151. except ValueError :

  152. print

  153. print"Please Enter A Volid Number !!"

  154.                display_format.error_print()

  155. else :

  156.            display_format.error_print()

  157.        fileAnalysis_obj = fileAnalysis()

  158.        not_true_dict = fileAnalysis_obj.generate_log_report(infile)

  159.        log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)

  160.        total_ip = len(log_report)

  161. if lines :

  162.            log_report = log_report[0:lines]

  163.        infile.close()

  164. print

  165.        total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)

  166.        total_request_times = fileAnalysis_obj.total_request_times

  167. print'Total IP: %s   Total Traffic: %s   Total Request Times: %d' \

  168.              % (total_ip,total_traffic,total_request_times)

  169. print

  170.        display_format.head()

  171.        display_format.transverse_line()

  172. for host in log_report :

  173.            times = host[1]['times']

  174.            times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100

  175. print display_format.formatstring % (host[0],\

  176.                                                 display_format.format_size(host[1]['size']),\

  177.                                                 times,str(times_percent)[0:5],\

  178.                                                 host[1]['200'],host[1]['404'],\

  179.                                                 host[1]['500'],host[1]['403'],\

  180.                                                 host[1]['302'],host[1]['304'],host[1]['503'])

  181. if (not lines) or total_ip == lines :

  182.            display_format.transverse_line()

  183. print display_format.formatstring % (total_ip,total_traffic, \

  184.                                                 total_request_times,'100%',\

  185.                                                 fileAnalysis_obj.total_200,\

  186.                                                 fileAnalysis_obj.total_404,\

  187.                                                 fileAnalysis_obj.total_500, \

  188.                                                 fileAnalysis_obj.total_403,\

  189.                                                 fileAnalysis_obj.total_302, \

  190.                                                 fileAnalysis_obj.total_304,\

  191.                                                 fileAnalysis_obj.total_503)

  192.        display_format.execut_time()

  193. if __name__ == '__main__':

  194.    main_obj = Main()

  195.    main_obj.main()


你可能感兴趣的:(apache,日志,nginx分析)