文件倒读
In [1]: cat /tmp/1.txt a b c d In [2]: ll /tmp/1.txt 应该是有8个字符,每个字符后边都有一个换行符号\n,加上换行符共8个字节。 -rw-r--r-- 1 admin wheel 8 7 3 15:18 /tmp/1.txt In [3]: f = open('/tmp/1.txt') In [4]: f.tell() 指针位置:0 在文件的开头 Out[4]: 0 In [5]: f.read(1) 读一个字符 Out[5]: 'a' In [6]: f.tell() 指针位置:1 Out[6]: 1 In [13]: f.read(1) 读一个字符 Out[13]: '\n' In [14]: f.tell() 指针位置:2 Out[14]: 2 In [15]: f.read(2) 读2个字节,b和\n Out[15]: 'b\n' In [16]: f.tell() 指针位置:4 Out[16]: 4 In [17]: f.read(100) Out[17]: 'c\nd\n' In [18]: f.tell() Out[18]: 8 In [30]: help(f.seek) Help on built-in function seek: seek(...) seek(offset[, whence]) -> None. Move to new file position. Argument offset is a byte count. Optional argument whence defaults to 0 (offset from start of file, offset should be >= 0); other values are 1 (move relative to current position, positive or negative), and 2 (move relative to end of file, usually negative, although many platforms allow seeking beyond the end of a file). If the file is opened in text mode, only offsets returned by tell() are legal. Use of other offsets causes undefined behavior. Note that not all file objects are seekable. 0 从开头指针位置移动 In [45]: f.seek(2) In [46]: f.tell() Out[46]: 2 In [10]: f.seek(0,0) 移动指针到开头,从开头向后移动0位 In [11]: f.tell() Out[11]: 0 1 从当前指针位置移动 f.tell() Out[42]: 5 In [43]: f.seek(-2,1) In [44]: f.tell() Out[44]: 3 2 从结尾的指针位置开始移动move relative to end of file In [8]: f.seek(3,2) 8+3=11从结尾8向后移动三位, In [9]: f.tell() Out[9]: 11 In [12]: f.seek(0,2) 把指针移动到最后 In [13]: f.tell() Out[13]: 8
admindeMacBook-Air-62:~ admin$ cat /tmp/1.txt a b c d #!/usr/bin/env python f = open('/tmp/1.txt') f.seek(0,2) while True: if f.tell() == 1: break else: f.seek(-2,1) c = f.read(1) print c, python seek.py d c b a #!/usr/bin/env python #encoding:utf8 import sys f = open('/etc/hosts') f.seek(0,2) line = '' while True: if f.tell() == 1: print line #打印1a的条件 break else: f.seek(-2,1) c = f.read(1) if c != '\n': line = c + line #空+d=d,4+d=4d else: print line line = '' #重置line=空 python seek3.py 221.228.208.76 dmp.chinadep.com admin.chinadep.com
文件倒读
In [1]: cat /tmp/1.txt a b c d In [2]: ll /tmp/1.txt 应该是有8个字符,每个字符后边都有一个换行符号\n,加上换行符共8个字节。 -rw-r--r-- 1 admin wheel 8 7 3 15:18 /tmp/1.txt In [3]: f = open('/tmp/1.txt') In [4]: f.tell() 指针位置:0 在文件的开头 Out[4]: 0 In [5]: f.read(1) 读一个字符 Out[5]: 'a' In [6]: f.tell() 指针位置:1 Out[6]: 1 In [13]: f.read(1) 读一个字符 Out[13]: '\n' In [14]: f.tell() 指针位置:2 Out[14]: 2 In [15]: f.read(2) 读2个字节,b和\n Out[15]: 'b\n' In [16]: f.tell() 指针位置:4 Out[16]: 4 In [17]: f.read(100) Out[17]: 'c\nd\n' In [18]: f.tell() Out[18]: 8 In [30]: help(f.seek) Help on built-in function seek: seek(...) seek(offset[, whence]) -> None. Move to new file position. Argument offset is a byte count. Optional argument whence defaults to 0 (offset from start of file, offset should be >= 0); other values are 1 (move relative to current position, positive or negative), and 2 (move relative to end of file, usually negative, although many platforms allow seeking beyond the end of a file). If the file is opened in text mode, only offsets returned by tell() are legal. Use of other offsets causes undefined behavior. Note that not all file objects are seekable. 0 从开头指针位置移动 In [45]: f.seek(2) In [46]: f.tell() Out[46]: 2 In [10]: f.seek(0,0) 移动指针到开头,从开头向后移动0位 In [11]: f.tell() Out[11]: 0 1 从当前指针位置移动 f.tell() Out[42]: 5 In [43]: f.seek(-2,1) In [44]: f.tell() Out[44]: 3 2 从结尾的指针位置开始移动move relative to end of file In [8]: f.seek(3,2) 8+3=11从结尾8向后移动三位, In [9]: f.tell() Out[9]: 11 In [12]: f.seek(0,2) 把指针移动到最后 In [13]: f.tell() Out[13]: 8
admindeMacBook-Air-62:~ admin$ cat /tmp/1.txt a b c d #!/usr/bin/env python f = open('/tmp/1.txt') f.seek(0,2) while True: if f.tell() == 1: break else: f.seek(-2,1) c = f.read(1) print c, python seek.py d c b a #!/usr/bin/env python #encoding:utf8 import sys f = open('/etc/hosts') f.seek(0,2) line = '' while True: if f.tell() == 1: print line #打印1a的条件 break else: f.seek(-2,1) c = f.read(1) if c != '\n': line = c + line #空+d=d,4+d=4d else: print line line = '' #重置line=空 python seek3.py 221.228.208.76 dmp.chinadep.com admin.chinadep.com admindeMacBook-Air-62:~ admin$ cat /tmp/1.txt 1aA 2bB 3cC 4dD vim seek5.py #!/usr/bin/env python #encoding:utf8 import sys def filerev(fn): f = open(fn) f.seek(0,2) line = '' while True: if f.tell() == 1: #把最前面的一个字符读出来f.read()=1,f.tell()由0->1,打印第一行 yield line #打印1a的条件 break else: f.seek(-2,1) c = f.read(1) if c != '\n': line = c + line #空+d=d,4+d=4d else: yield line line = '' #重置line=空 f.close() a = filerev('/tmp/1.txt') #a只是一个生成器对象,没有值,遍历才有值 for i in a : print i 4dD 3cC 2bB 1aA vim seek6.py #!/usr/bin/env python #encoding:utf8 import sys def filerev(fn): buffer = 256 #定义一个buffer256字节 f = open(fn) f.seek(0,2) size = f.tell() #所有子节包括\n rem = size % buffer #文件不可能被buffer整整除,肯定有一些剩下的部分,rem是求余数 offset =max(0, size - (buffer + rem))#取最大值,0 > 一个负数,偏移量,如果buffer小于文件大小,将指针移动到最后一个buffer前面,如果size>buffer(256子节)此时指针的位置是buffer+rem line = '' while True: if offset < 0: #把最前面的一个字符读出来f.read()=1,f.tell()由0->1,打印第一行 yield line #打印1a的条件 break else: f.seek(offset) d = f.read(buffer + rem) rem = 0 #第一次读rem,以后rem都是0,以后只读一个buffer offset -= buffer #print d #d = 1aA 2bB 3cC 4dD if '\n' in d: # d = 1aA 2bB 3cC 4dD for c in reversed(d):#判断读出来的内容是否包含换行符\n,如果有的话,所有字节反转,判断\n, #print c # D d 4 C c 3 B b 2 A a 1 if c != '\n': # c = \n line = c + line #D+空=D,d+D=dD, 4+dD=4dD #print line else: if line: #\n=\n,yes,yield=4dD,置空line yield line line = '' # 重置line=空 else: line = d + line f.close() a = filerev('/etc/passwd') #a只是一个生成器对象,没有值,遍历才有值 for i in a : print _wwwproxy:*:252:252:WWW Proxy:/var/empty:/usr/bin/false _xserverdocs:*:251:251:OS X Server Documents Service:/var/empty:/usr/bin/false _ondemand:*:249:249:On Demand Resource Daemon:/var/db/ondemand:/usr/bin/false _mbsetupuser:*:248:248:Setup User:/var/setup:/bin/bash _gamecontrollerd:*:247:247:Game Controller Daemon:/var/empty:/usr/bin/false _krbfast:*:246:-2:Kerberos FAST Account:/var/empty:/usr/bin/false _astris:*:245:245:Astris Services:/var/db/astris:/usr/bin/false
统计apache日志
#!/usr/bin/env python #encoding:utf8 import sys import socket import datetime #日志格式,取时间: 116.236.168.138 - - [29/Sep/2017:02:20:36 +0800] "GET /render/?width=586&height=308&_salt=1499170697.335 HTTP/1.1" 200 1147 MONTH = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Dec': 11, 'Oct': 12 } def parseLogtime(s): day,month,yearandtime = s.split('/') #print day,month,yearandtime #日志默认格式,日,月,年,时分秒,29 Sep 2017:02:13:04 year,hour,minute,second = [int(i) for i in yearandtime.split(':')] #print datetime.datetime(year,MONTH[month],int(day),hour,minute,second) #月MONTH[month],打印年月日,时分秒2017-09-29 01:55:46 return datetime.datetime(year,MONTH[month],int(day),hour,minute,second) #月Jun对应数字6,MONTH(month) def countDict(k,d): # 统计字典,定义函数,解析日志每一行,往字典中添加元素 if k in d: # 判断k是不是在字典中,k如果在字典中,字典的k对应的value加1 d[k] +=1 else: # 判断k是不是在字典中,k如果不在字典中,新建字典元素k,value=1 d[k] = 1 def parseLogfile(f):#key是时间戳,value是日志出现的次数(日志文件被访问的次数) dic = {} with open(f) as fd: for line in fd: # 日志原文,116.236.168.138 - - [29/Sep/2017:02:20:36 +0800] 空为分隔符,切割后,3表示取第4个,line.split()[3]=[29/Sep/2017:02:20:36 timestr = line.split()[3][1:] # 切片操作timestr = line.split()[3][1:] = 29/Sep/2017:02:20:36 [1: ]把中括号去掉 logtime = parseLogtime(timestr) # timestr经过parseLogtime()函数的处理,就会返回年月日时分秒的格式的时间了,logtime = 2017,9,29,2,20,36 #print logtime countDict(logtime,dic) # logtime时间戳最为字典dic的key,value=1,2,3,出现多一次,次数加1 #print dic #{datetime.datetime(2017, 7, 8, 10, 33, 24): 3, datetime.datetime(2017, 12, 2, 16, 13, 12): 3, datetime.datetime(2017, 12, 1, 9, 58, 58): 4, datetime.datetime(2017, 12, 2, 1, 30, 17): 1, datetime.datetime(2017, 12, 1, 8, 45, 39): 1, datetime.datetime(2017, 12, 2, 1, 4, 6): 1, datetime.datetime(2017, 12, 1, 10, 20, 51): 1, datetime.datetime(2017, 12, 2, 6, 28, 7): 1, datetime.datetime(2017, 12, 1, 8, 13, 54): 2, datetime.datetime(2017, 12, 1, 4, 51, 8): 1, datetime.datetime(2017, 12, 2, 16, 25, 37): 1, datetime.datetime(2017, 12, 2, 16, 13, 10): 1, datetime.datetime(2017, 7, 8, 10, 28, 4): 1, datetime.datetime(2017, 12, 2, 16, 23, 48): 2, datetime.datetime(2017, 7, 8, 10, 27, 35): 5, datetime.datetime(2017, 12, 2, 16, 13, 20): 1, datetime.datetime(2017, 7, 8, 10, 33, 29): 1, datetime.datetime(2017, 7, 8, 10, 28, 3): 1, datetime.datetime(2017, 12, 1, 9, 58, 55): 5, datetime.datetime(2017, 12, 1, 9, 58, 56): 1, datetime.datetime(2017, 12, 1, 8, 45, 18): 1, datetime.datetime(2017, 7, 8, 10, 27, 58): 4, datetime.datetime(2017, 12, 1, 8, 45, 43): 1, datetime.datetime(2017, 12, 1, 8, 13, 58): 1, datetime.datetime(2017, 12, 2, 6, 28, 5): 1, datetime.datetime(2017, 12, 2, 6, 28, 12): 1, datetime.datetime(2017, 12, 2, 6, 27, 59): 1, datetime.datetime(2017, 12, 1, 8, 13, 51): 2, datetime.datetime(2017, 7, 8, 10, 33, 26): 1, datetime.datetime(2017, 12, 1, 20, 58, 19): 2, datetime.datetime(2017, 12, 2, 16, 23, 50): 2, datetime.datetime(2017, 7, 8, 10, 33, 34): 5, datetime.datetime(2017, 12, 1, 8, 13, 52): 2, datetime.datetime(2017, 12, 1, 10, 20, 46): 1, datetime.datetime(2017, 12, 1, 10, 20, 56): 1, datetime.datetime(2017, 7, 8, 10, 28, 49): 1, datetime.datetime(2017, 12, 2, 11, 8, 34): 1, datetime.datetime(2017, 12, 1, 12, 19, 37): 1, datetime.datetime(2017, 12, 1, 9, 58, 51): 10, datetime.datetime(2017, 12, 2, 16, 25, 35): 2, datetime.datetime(2017, 12, 2, 16, 13, 8): 2, datetime.datetime(2017, 12, 1, 4, 51, 13): 1, datetime.datetime(2017, 7, 8, 10, 28, 46): 3, datetime.datetime(2017, 12, 1, 8, 45, 41): 1, datetime.datetime(2017, 7, 8, 10, 36, 16): 1, datetime.datetime(2017, 12, 2, 6, 28, 9): 2, datetime.datetime(2017, 12, 1, 9, 58, 53): 14, datetime.datetime(2017, 12, 1, 18, 50, 43): 1, datetime.datetime(2017, 7, 8, 10, 43, 59): 1, datetime.datetime(2017, 12, 1, 10, 20, 55): 1, datetime.datetime(2017, 12, 2, 12, 40, 1): 1, datetime.datetime(2017, 12, 1, 8, 13, 49): 2, datetime.datetime(2017, 12, 1, 12, 19, 36): 1, datetime.datetime(2017, 12, 2, 16, 23, 52): 1, datetime.datetime(2017, 12, 1, 4, 18, 49): 1, datetime.datetime(2017, 12, 2, 16, 25, 38): 4, datetime.datetime(2017, 12, 1, 9, 59, 5): 1, datetime.datetime(2017, 7, 8, 10, 33, 36): 7, datetime.datetime(2017, 12, 1, 20, 58, 24): 21, datetime.datetime(2017, 12, 2, 16, 13, 50): 1, datetime.datetime(2017, 12, 2, 6, 40, 54): 1, datetime.datetime(2017, 12, 2, 16, 13, 14): 4, datetime.datetime(2017, 12, 1, 7, 28, 18): 1, datetime.datetime(2017, 12, 1, 5, 31, 27): 1, datetime.datetime(2017, 12, 1, 9, 59): 1, datetime.datetime(2017, 12, 1, 9, 58, 50): 1, datetime.datetime(2017, 12, 1, 8, 45, 47): 1, datetime.datetime(2017, 12, 2, 16, 26, 20): 5, datetime.datetime(2017, 12, 1, 8, 45, 32): 2, datetime.datetime(2017, 12, 1, 9, 58, 59): 1, datetime.datetime(2017, 12, 1, 8, 45, 29): 3, datetime.datetime(2017, 12, 1, 8, 45, 22): 7, datetime.datetime(2017, 12, 2, 12, 1, 21): 2, datetime.datetime(2017, 7, 8, 10, 33, 25): 2, datetime.datetime(2017, 12, 1, 10, 20, 53): 1, datetime.datetime(2017, 12, 1, 20, 58, 23): 11, datetime.datetime(2017, 12, 2, 16, 13, 41): 1, datetime.datetime(2017, 12, 1, 8, 13, 55): 2, datetime.datetime(2017, 12, 1, 14, 42, 34): 1, datetime.datetime(2017, 12, 2, 11, 8, 35): 1, datetime.datetime(2017, 12, 1, 9, 59, 2): 1, datetime.datetime(2017, 12, 1, 16, 36, 35): 1, datetime.datetime(2017, 12, 2, 13, 12, 41): 1, datetime.datetime(2017, 12, 2, 16, 25, 36): 3, datetime.datetime(2017, 12, 1, 7, 21, 55): 1, datetime.datetime(2017, 12, 2, 14, 37, 24): 1, datetime.datetime(2017, 12, 1, 12, 30, 10): 1, datetime.datetime(2017, 12, 1, 11, 56, 50): 2, datetime.datetime(2017, 7, 8, 10, 27, 22): 5, datetime.datetime(2017, 7, 8, 10, 33, 27): 1, datetime.datetime(2017, 12, 1, 8, 45, 45): 1, datetime.datetime(2017, 12, 1, 8, 45, 20): 2, datetime.datetime(2017, 7, 8, 10, 27, 59): 1, datetime.datetime(2017, 12, 1, 9, 58, 57): 2, datetime.datetime(2017, 12, 1, 8, 13, 53): 1, datetime.datetime(2017, 12, 1, 20, 25, 12): 1, datetime.datetime(2017, 7, 8, 10, 36, 14): 5, datetime.datetime(2017, 12, 1, 10, 20, 48): 1, datetime.datetime(2017, 7, 8, 10, 27, 24): 5, datetime.datetime(2017, 7, 8, 10, 43, 58): 5, datetime.datetime(2017, 7, 8, 10, 28, 48): 1, datetime.datetime(2017, 12, 2, 16, 23, 49): 1, datetime.datetime(2017, 12, 1, 20, 58, 21): 8, datetime.datetime(2017, 12, 1, 10, 20, 59): 2, datetime.datetime(2017, 7, 8, 10, 28, 47): 2, datetime.datetime(2017, 12, 2, 16, 13, 47): 1, datetime.datetime(2017, 7, 8, 10, 33, 30): 1, datetime.datetime(2017, 12, 1, 10, 0, 45): 1, datetime.datetime(2017, 12, 1, 20, 58, 18): 1, datetime.datetime(2017, 12, 2, 16, 13, 21): 3, datetime.datetime(2017, 12, 1, 4, 51, 11): 1, datetime.datetime(2017, 12, 2, 16, 25, 34): 3, datetime.datetime(2017, 12, 2, 16, 13, 18): 1, datetime.datetime(2017, 7, 8, 10, 27, 34): 1, datetime.datetime(2017, 12, 1, 10, 21, 3): 1, datetime.datetime(2017, 12, 2, 6, 28, 10): 1, datetime.datetime(2017, 12, 2, 12, 1, 22): 1, datetime.datetime(2017, 12, 1, 8, 13, 57): 2, datetime.datetime(2017, 12, 2, 6, 28, 3): 1, datetime.datetime(2017, 12, 1, 9, 13, 38): 1, datetime.datetime(2017, 12, 2, 6, 25, 5): 1, datetime.datetime(2017, 12, 1, 10, 20, 54): 1, datetime.datetime(2017, 7, 8, 10, 28): 1, datetime.datetime(2017, 12, 1, 10, 20, 57): 1, datetime.datetime(2017, 12, 1, 8, 13, 50): 3, datetime.datetime(2017, 12, 1, 10, 20, 47): 1, datetime.datetime(2017, 12, 2, 16, 13, 6): 3, datetime.datetime(2017, 12, 2, 16, 13, 16): 5, datetime.datetime(2017, 12, 2, 16, 13, 9): 2, datetime.datetime(2017, 7, 8, 10, 27, 33): 4, datetime.datetime(2017, 7, 8, 10, 33, 37): 2, datetime.datetime(2017, 12, 2, 16, 23, 51): 2, datetime.datetime(2017, 12, 1, 17, 14, 23): 1, datetime.datetime(2017, 12, 1, 10, 21, 4): 1, datetime.datetime(2017, 12, 1, 8, 45, 30): 4, datetime.datetime(2017, 12, 2, 16, 23, 47): 1, datetime.datetime(2017, 12, 1, 4, 16, 26): 1, datetime.datetime(2017, 12, 1, 9, 58, 52): 18, datetime.datetime(2017, 12, 1, 8, 45, 23): 25, datetime.datetime(2017, 12, 1, 8, 45, 33): 1, datetime.datetime(2017, 12, 1, 8, 45, 40): 1, datetime.datetime(2017, 12, 1, 21, 14, 59): 3, datetime.datetime(2017, 12, 1, 7, 32, 9): 1, datetime.datetime(2017, 12, 2, 6, 28, 1): 1, datetime.datetime(2017, 7, 8, 10, 33, 35): 3, datetime.datetime(2017, 12, 1, 3, 59, 15): 1, datetime.datetime(2017, 12, 2, 1, 56, 13): 1, datetime.datetime(2017, 12, 1, 10, 20, 52): 1, datetime.datetime(2017, 7, 8, 10, 28, 2): 1, datetime.datetime(2017, 12, 2, 16, 13, 40): 1, datetime.datetime(2017, 12, 2, 11, 8, 33): 1, datetime.datetime(2017, 12, 1, 9, 59, 1): 1, datetime.datetime(2017, 12, 2, 16, 13, 22): 2, datetime.datetime(2017, 12, 1, 9, 12, 2): 1, datetime.datetime(2017, 12, 1, 21, 15): 2, datetime.datetime(2017, 12, 1, 8, 45, 21): 9, datetime.datetime(2017, 12, 1, 8, 45, 28): 2, datetime.datetime(2017, 7, 8, 10, 27, 37): 1, datetime.datetime(2017, 12, 2, 12, 0, 35): 1, datetime.datetime(2017, 12, 1, 17, 9, 56): 1} return dic #if __name__ == '__main__': # print parseLogfile('/tmp/graphite-web-access.log') def send_graphite(dic): sock = socket.socket() host = '112.65.140.132' port = 2003 sock.connect((host, port)) #print s key = "httpd.count" for k, v in dic.items(): #key时间戳,value是时间戳的次数 s = "%s %d %s\n" % (key, v, k.strftime('%s')) print s sock.send(s) if __name__ == '__main__': try: logfile = '/tmp/graphite-web-access.log' except IndexError: print "%s follow a argument" % __file__ sys.exit() data = parseLogfile(logfile) #print data send_graphite(data) #[root@client ~]# netstat -lnp #Active Internet connections (only servers) #Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name #tcp 0 0 127.0.0.1:6379 0.0.0.0:* LISTEN 2414/redis-server 1 #tcp 0 0 0.0.0.0:9391 0.0.0.0:* LISTEN 863/openvassd #tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1212/rpcbind #tcp 0 0 0.0.0.0:2003 0.0.0.0:* LISTEN 6212/python2.6 #tcp 0 0 0.0.0.0:2004 0.0.0.0:* LISTEN 6212/python2.6 执行结果: /System/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7 /Users/admin/Desktop/project/graphite/apache_log.py httpd.count 3 1499481204 httpd.count 3 1512202392 httpd.count 1 1512187235 httpd.count 1 1512119396 Process finished with exit code 0
[root@client whisper]# pwd /var/lib/carbon/whisper [root@client whisper]# ll 总用量 16 drwxr-xr-x 3 carbon carbon 4096 9月 5 2017 carbon drwxr-xr-x 2 carbon carbon 4096 7月 8 11:42 httpd drwxr-xr-x 2 carbon carbon 4096 9月 7 2017 interface drwxr-xr-x 2 carbon carbon 4096 9月 6 2017 test [root@client whisper]# ll httpd/ 总用量 20 -rw-r--r-- 1 carbon carbon 17308 7月 8 11:46 count.wsp