学习python,在老雷的指点下去完成一个小需求
功能:检索/var/log/secure中的ip,超过阀值就写入/etc/hosts.deny文件中去
1、首先从/var/log/secure中提取ip地址
2、然后进行排序,挑选出访问量大于500的ip
3、把收集到的ip写入到hosts.deny文件中,写入之前先判断是否已存在
格式要求如下:
########2014-06-25####### 60.xxx.xxx.xxx 58.xxx.xxx.xxx ########2014-06-26####### 60.xxx.xxx.xxx 58.xxx.xxx.xxx
解决过程
1、首先从/var/log/secure中提取ip地址
# coding: utf-8 # Auther: zhuima # Date: 2014-06-24 # Function: fetch ip address # Usage: python sys.argv[0] logpath # import re import sys pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)') def ip_fetch(logfile): with open(logfile) as f: for line in f: m = pattern.search(line) if m: print m.group() if len(sys.argv) == 2: ip_fetch(sys.argv[1]) else: print "Usage: python %s /path/logfile" % sys.argv[0] print " "
效果如下:
[root@nginx1 python]# python tiquip.py secure 114.114.114.114 [root@nginx1 python]# python tiquip.py Usage: python tiquip.py /path/logfile [root@nginx1 python]#
2、进行排序,挑选出排行前十的ip
需要解决问题:去重排序
sorted()排序
set()去重
裸用dict来做得话:
counter = dict()
for ip in m:
counter[ip] = counter.get(ip, 0) + 1
http://www.newsmth.net/nForum/#!article/Python/113879
http://hi.baidu.com/pythond/item/3607e6564aacae928d12ed41
去重(成哥提供的网站)
from collections import Counter
Counter([1,1,1,2,3,4,1,2,3,4])
Counter({1: 4, 2: 2, 3: 2, 4: 2})
2.1、去重操作
[root@nginx1 python]# cat fetch.py # coding: utf-8 # Auther: zhuima # Date: 2014-06-24 # Function: fetch ip address # Usage: python sys.argv[0] logpath # from collections import Counter import re import sys pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)') listip = [] def ip_fetch(logfile): with open(logfile) as f: for line in f: m = pattern.search(line) if m: listip.append(m.group()) #print m.group() def sort(): td = Counter(listip) for ip,count in td.items(): print count,"\t",ip if len(sys.argv) == 2: ip_fetch(sys.argv[1]) sort() else: print "Usage: python %s /path/logfile" % sys.argv[0] print " "
效果如下:
[root@nginx1 python]# python fetch.py secure 1 114.xxx.xxx.xxx 1 114.xxx.xxx.xxx 1 106.xxx.xxx.xxx 4 124.xxx.xxx.xxx 1 61.xxx.xxx.xxx [root@nginx1 python]#
2.2、接下来进行排序
http://www.cnblogs.com/liyixin/archive/2012/07/23/2605013.html
脚本样式:
# coding: utf-8 # Auther: zhuima # Date: 2014-06-24 # Function: fetch ip address # Usage: python sys.argv[0] logpath # from collections import Counter import re import sys pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)') listip = [] def ip_fetch(logfile): with open(logfile) as f: for line in f: m = pattern.search(line) if m: listip.append(m.group()) def sort(): td = Counter(listip) newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True) for line in newtd: print line if len(sys.argv) == 2: ip_fetch(sys.argv[1]) sort() else: print "Usage: python %s /path/logfile" % sys.argv[0] print " "
效果如下:
[root@nginx1 python]# python fetch.py secure ('124.64.63.119', 4) ('114.245.169.74', 1) ('114.252.165.177', 1) ('106.37.169.186', 1) ('61.49.238.82', 1) [root@nginx1 python]#
再次更改,去除数值外面的圆括号
[root@nginx1 python]# vim fetch.py # coding: utf-8 # Auther: zhuima # Date: 2014-06-24 # Function: fetch ip address # Usage: python sys.argv[0] logpath # from collections import Counter import re import sys pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)') listip = [] def ip_fetch(logfile): with open(logfile) as f: for line in f: m = pattern.search(line) if m: listip.append(m.group()) def sort(): td = Counter(listip) newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True) for line in newtd: print line[1],"\t",line[0] if len(sys.argv) == 2: ip_fetch(sys.argv[1]) sort() else: print "Usage: python %s /path/logfile" % sys.argv[0] print " "
效果如下:
[root@nginx1 python]# python fetch.py secure 4 124.xxx.xxx.xxx 1 114.xxx.xxx.xxx 1 114.xxx.xxx.xxx 1 106.xxx.xxx.xxx 1 61.xxx.xxx.xxx [root@nginx1 python]#
3、解析来解决如何写入到hosts.deny文件中去
思路:
先判断要写入的ip是否在hosts.deny文件中,如果在,就跳过,如果不在就写入
脚本测试文件,写不进文件中去
[root@nginx1 python]# vim write.py # coding utf-8 # x = '192.168.23.22' with open("/etc/hosts.deny","a+") as f: for line in f: if x not in line: f.write(x)
# coding utf-8 # import datetime dd = datetime. x = '192.168.23.22' f = open("/etc/hosts.deny","a+") for line in f: if x not in f: f.write("#########") f.write(datetime f.write("#########") f.write(x) f.write("\n") f.close()
http://blog.sina.com.cn/s/blog_6c3748830100ypt9.html
单个ip验证的时候
[root@rsync python]# cat write.py # coding utf-8 # import datetime day = datetime.date.today() today = day.strftime("%Y-%m-%d") x = ['192.168.23.22','192.168.23.21'] i = 0 f = open("/etc/hosts.deny","a+") for line in f: if x[i] not in line: print x[i] f.write("#########") f.write(today) f.write("#########") f.write("\n") f.write(x[i]) f.write("\n") i += 1 else: print x[i], "is exists!" break f.close()
当然我们实际环境中不可能只用一个ip,肯定会有很多的额,所以这里就需要使用列表了
多个参数传入的时候该如何是好
[root@rsync python]# cat write.py # coding utf-8 # import datetime day = datetime.date.today() today = day.strftime("%Y-%m-%d") x = ['192.168.23.22','192.168.23.21'] i = 0 f = open("/etc/hosts.deny","a+") for line in f: if x[i] not in line and i <= len(x): f.write("#########") f.write(today) f.write("#########") f.write("\n") f.write(x[i]) f.write("\n") i += 1 else: print x[i], "is exists!" break
上面代码报错:
[root@rsync python]# [root@rsync python]# python write.py /etc/hosts.deny Traceback (most recent call last): File "write.py", line 20, inif x[i] not in line and i <= len(x): IndexError: list index out of range [root@rsync python]#
踩坑之旅:
一直陷在循环里面挑不出来了,所以就一直绕啊绕~
问题:
1、嵌套循环无法解决上层循环的次数比内层循环的次数多的问题,从而导致插入数据过多
2、判断数据在列表中是否存在的时候老是想着要对比循环
3、基础薄弱
老雷给的建议:
# coding utf-8 # import datetime day = datetime.date.today() today = day.strftime("%Y-%m-%d") ip_list = ['192.168.23.22','192.168.23.21'] data = open("/etc/hosts.deny","a+") content_list = data.readlines() for i in ip_list: if i not in content_list: data.write(i+'\n') f.close()
最总完结的脚本:
[root@blog python]# cat fetch.py # coding: utf-8 # Auther: zhuima # Date: 2014-06-24 # Function: fetch ip address # Usage: python sys.argv[0] logpath # from collections import Counter import re import sys import datetime pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)') source_listip = [] today = datetime.date.today().strftime("%Y-%m-%d") def ip_fetch(logfile): with open(logfile) as f: for line in f: m = pattern.search(line) if m: source_listip.append(m.group()) def sort_write(): data_file = open("/etc/hosts.deny","a+") content_list = data_file.readlines() note = ["########",today,"#######\n"] td = Counter(source_listip) newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True) for line in content_list: if today in line.strip("#"): break else: data_file.writelines(note) break for line in newtd: print line[1],"\t",line[0] if line[1] >= 500 and line[0] not in content_list: data_file.write(line[0]+"\n") data_file.close() if len(sys.argv) == 2: ip_fetch(sys.argv[1]) sort_write() else: print "Usage: python %s /path/logfile" % sys.argv[0]
效果如下图所示:
Note:
由于导入datetime是使用的当前时间,而不是系统时间,所以这里显示的两个时间是一致的
总结:
感谢斌哥的思路指导,感谢成哥的Counter的思路提供,感谢老雷的一路指导~
1、基础知识掌握太弱
2、写的过程中老是急于求成,没有精心来去思考
3、多看官方文档,建议把标准库查看一遍
4、该脚本很烂,后期继续以函数的形式来写