使用Python处理硬盘数据日志

使用python的多线程,多进程来处理硬盘测试中产生的批量数据,分析duplicate数量,可运行,结果ok,就是速度一般般。
解析每一行的数据过程,仅作工程中使用,不具有代表性。


from sys import argv
from os.path import exists
from multiprocessing import Pool
from time import sleep
import os
import multiprocessing
import threading
import time
"""refer link:
http://www.cnblogs.com/rollenholt/archive/2012/04/23/2466179.html
http://www.cnblogs.com/vamei/archive/2012/10/12/2721484.html
"""
def getFileList(directryName):
    if os.path.isdir(directryName):
        listFile=os.listdir(directryName)
        #print listFile
        return listFile
def threadTest(str,lock):
    lock.acquire()
    print str
    lock.release()
#Multi_thread
def multiThread():
    record = []
    lock = threading.Lock()
    for i in range(5):
        thread = threading.Thread(target = threadTest,args =(i,lock))
        thread.start()
        record.append(thread)
    for thread in record:
        thread.join()
def multiThreadForDuplicateCheck():
    start = time.clock()
    record = []
    directory = raw_input("Enter directory name\n")
    for (path,dirs,files) in os.walk(directory):
        #fileList = getFileList(path)
        for fileName in  files:
            #print os.path.splitext(fileName)[1]
            if os.path.splitext(fileName)[1] == '.txt':
                fileName = path+'\\'+fileName
                thread = threading.Thread(target = DuplicateCheck,args =(fileName,))
                thread.start()
                record.append(thread)
        for thread in record:
            thread.join()
        elapsed = (time.clock() - start)
        print ("Muliti thread Time used:",elapsed)
#Multi_process 
def multiProcessForDuplicateCheck():
    start = time.clock()
    record = []
    directory = raw_input("Enter directory name\n")
    for (path,dirs,files) in os.walk(directory):
        #fileList = getFileList(path)
        for fileName in  files:
            #print os.path.splitext(fileName)[1]
            if os.path.splitext(fileName)[1] == '.txt':
                fileName = path+'\\'+fileName
                process = multiprocessing.Process(target = DuplicateCheck,args=(fileName,))
                process.start()
                record.append(process)
    for process in record:
        process.join()
    elapsed = (time.clock() - start)
    print ("Muliti process Time used:",elapsed)
#duplicate check function   
def DuplicateCheck(fileName):   
    firstLine = 0     
    asatWriteDone = 0
    asatWriteGLogCnt = 0
    asatWriteCylList = []
    asatReadDone = 0
    asatReadGLogCnt =0
    asatReadCylList = []
    #fileName = 'C:/Users/xlian/Downloads/rawdata (20)/EZ08D1XM.txt'
    #fileName = raw_input("Enter file to decode")
    f = open(fileName,'r')
    #A:read all lines if file is small,if too big,change to B
    """allLines = f.readlines()
    f.close()
    for eachLine in allLines:
        print eachLine
    """
    #B:Read line by line using file iterator
    for eachLine in f:
        if firstLine == 0:
            SN = eachLine[7:16].strip()
            firstLine = 1
        #print eachLine
        #splitStr = eachLine.split('  ')
        if "F:47" in eachLine:
            asatWriteGLogCnt +=1
            #print eachLine
            #asatWriteCylList.append(eachLine[:}
            asatWriteCylList.append(eachLine[0:12].strip()+eachLine[13:15].strip()+eachLine[23:26].strip())
        
        if "0x2e" in eachLine:
            if asatWriteDone == 0:
                asatWriteDone = 1 # asat write increase done
            elif asatWriteDone == 1:
                asatWriteDone = 2 # asat write descrease done
            else:
                pass
        if asatWriteDone == 2:
            if "0x2b" in eachLine:
                asatReadDone = 1 #Asat Read log check done
            if asatReadDone == 0:
                if eachLine[23:26].strip() == '10':
                    #if eachLine[0:12].strip() in asatWriteCylList:
                    for cylIndex in range(len(asatWriteCylList)):
                        if eachLine[0:12].strip() == asatWriteCylList[cylIndex][0:-3] and eachLine[13:15].strip() == asatWriteCylList[cylIndex][-3]:
                            #print asatWriteCylList[cylIndex]
                            asatReadGLogCnt +=1
                            del asatWriteCylList[cylIndex]
                            break
            #if splitStr[2] in asatWriteCylList:
            #    asatReadGLogCnt += 1
            #   print "asat read"+eachLine
    f.close()
    if asatWriteGLogCnt != 0:
        ratio = 100*asatReadGLogCnt/asatWriteGLogCnt
    else:
        ratio = 0
    print 'SN:%s,Write G log:%d Read G log:%d Ratio:%.4f%%\n'%(SN,asatWriteGLogCnt,asatReadGLogCnt,ratio)
    #sleep(1)
    #directry = raw_input("Enter directry:D:/LXK/Your directry\n")
    #print getFileList(directry)
if __name__=="__main__":
    """
    i = 0
    pool = Pool(processes = 4)
    directory = raw_input("Enter directory name\n")
    fileList = getFileList(directory)
    print fileList[0]
    while i< len(fileList):
        result = pool.apply_async(DuplicateCheck,(fileList[i],))
        i+=1
    pool.close()
    pool.join()
    if result.successful():
        print "check successfull"
    """
    #multiThread()                 # test for thread operation
    multiThreadForDuplicateCheck() #multi thread operation,lower than mutiprocess
    #multiProcessForDuplicateCheck() #multi process operation

你可能感兴趣的:(python文件处理)