大文件的python处理

遇到QA测试人员提供的了超大的log文件3.24G,有些无从下手,故写了如下python code,将大文件分割并保存监测出关键字的部分。

具体code如下

#!/usr/bin/python
# AUTHER: ZPEG

import os,sys,shutil

findstr = "CTRL-EVENT-DISCONNECTED"
kilobytes = 1024
megabytes = 1024 * kilobytes
chunksize = int(100*megabytes)#default chunksize

def isstr(filename, str):
	file = open(filename)
	print('Bengin find ',str,' in ',  filename)
	for line in file.readlines():
		if str in line:
			file.close()
			print('find ',str,' in ',  filename)
			return True
	print('Not find ',str,' in ',  filename)

	return False

def split(fromfile,todir):
    if not os.path.exists(todir):#check whether todir exists or not
        os.mkdir(todir)          
    else:
        for fname in os.listdir(todir):
            os.remove(os.path.join(todir,fname))
    partnum = 0
    inputfile = open(fromfile,'rb')#open the fromfile
    while 1:
        print(partnum)
        chunk = inputfile.read(chunksize)
        if not chunk:             #check the chunk is empty
            break
        partnum += 1
        filename = os.path.join(todir,('data%04d'%partnum))
        fileobj = open(filename,'wb')#make partfile
        fileobj.write(chunk)         #write data into partfile
        fileobj.close()
        if not isstr(filename, findstr):
            os.remove(filename)
            print(filename)

    return partnum

if __name__=='__main__':
	fromfile  = "ruibin_wifi-0716.logcat"
	todir     = "zrb"
	absfrom,absto = map(os.path.abspath,[fromfile,todir])
	print('Splitting',absfrom,'to',absto,'by',chunksize)
	try:
		parts = split(fromfile,todir)
	except:
		print('Error during split:')
		print(sys.exc_info()[0],sys.exc_info()[1])
	else:
		print('split finished:',parts,'parts are in',absto)

 

你可能感兴趣的:(Liunx,/,debug)