使用python的多线程,多进程来处理硬盘测试中产生的批量数据,分析duplicate数量,可运行,结果ok,就是速度一般般。 解析每一行的数据过程,仅作工程中使用,不具有代表性。 from sys import argv from os.path import exists from multiprocessing import Pool from time import sleep import os import multiprocessing import threading import time """refer link: http://www.cnblogs.com/rollenholt/archive/2012/04/23/2466179.html http://www.cnblogs.com/vamei/archive/2012/10/12/2721484.html """ def getFileList(directryName): if os.path.isdir(directryName): listFile=os.listdir(directryName) #print listFile return listFile def threadTest(str,lock): lock.acquire() print str lock.release() #Multi_thread def multiThread(): record = [] lock = threading.Lock() for i in range(5): thread = threading.Thread(target = threadTest,args =(i,lock)) thread.start() record.append(thread) for thread in record: thread.join() def multiThreadForDuplicateCheck(): start = time.clock() record = [] directory = raw_input("Enter directory name\n") for (path,dirs,files) in os.walk(directory): #fileList = getFileList(path) for fileName in files: #print os.path.splitext(fileName)[1] if os.path.splitext(fileName)[1] == '.txt': fileName = path+'\\'+fileName thread = threading.Thread(target = DuplicateCheck,args =(fileName,)) thread.start() record.append(thread) for thread in record: thread.join() elapsed = (time.clock() - start) print ("Muliti thread Time used:",elapsed) #Multi_process def multiProcessForDuplicateCheck(): start = time.clock() record = [] directory = raw_input("Enter directory name\n") for (path,dirs,files) in os.walk(directory): #fileList = getFileList(path) for fileName in files: #print os.path.splitext(fileName)[1] if os.path.splitext(fileName)[1] == '.txt': fileName = path+'\\'+fileName process = multiprocessing.Process(target = DuplicateCheck,args=(fileName,)) process.start() record.append(process) for process in record: process.join() elapsed = (time.clock() - start) print ("Muliti process Time used:",elapsed) #duplicate check function def DuplicateCheck(fileName): firstLine = 0 asatWriteDone = 0 asatWriteGLogCnt = 0 asatWriteCylList = [] asatReadDone = 0 asatReadGLogCnt =0 asatReadCylList = [] #fileName = 'C:/Users/xlian/Downloads/rawdata (20)/EZ08D1XM.txt' #fileName = raw_input("Enter file to decode") f = open(fileName,'r') #A:read all lines if file is small,if too big,change to B """allLines = f.readlines() f.close() for eachLine in allLines: print eachLine """ #B:Read line by line using file iterator for eachLine in f: if firstLine == 0: SN = eachLine[7:16].strip() firstLine = 1 #print eachLine #splitStr = eachLine.split(' ') if "F:47" in eachLine: asatWriteGLogCnt +=1 #print eachLine #asatWriteCylList.append(eachLine[:} asatWriteCylList.append(eachLine[0:12].strip()+eachLine[13:15].strip()+eachLine[23:26].strip()) if "0x2e" in eachLine: if asatWriteDone == 0: asatWriteDone = 1 # asat write increase done elif asatWriteDone == 1: asatWriteDone = 2 # asat write descrease done else: pass if asatWriteDone == 2: if "0x2b" in eachLine: asatReadDone = 1 #Asat Read log check done if asatReadDone == 0: if eachLine[23:26].strip() == '10': #if eachLine[0:12].strip() in asatWriteCylList: for cylIndex in range(len(asatWriteCylList)): if eachLine[0:12].strip() == asatWriteCylList[cylIndex][0:-3] and eachLine[13:15].strip() == asatWriteCylList[cylIndex][-3]: #print asatWriteCylList[cylIndex] asatReadGLogCnt +=1 del asatWriteCylList[cylIndex] break #if splitStr[2] in asatWriteCylList: # asatReadGLogCnt += 1 # print "asat read"+eachLine f.close() if asatWriteGLogCnt != 0: ratio = 100*asatReadGLogCnt/asatWriteGLogCnt else: ratio = 0 print 'SN:%s,Write G log:%d Read G log:%d Ratio:%.4f%%\n'%(SN,asatWriteGLogCnt,asatReadGLogCnt,ratio) #sleep(1) #directry = raw_input("Enter directry:D:/LXK/Your directry\n") #print getFileList(directry) if __name__=="__main__": """ i = 0 pool = Pool(processes = 4) directory = raw_input("Enter directory name\n") fileList = getFileList(directory) print fileList[0] while i< len(fileList): result = pool.apply_async(DuplicateCheck,(fileList[i],)) i+=1 pool.close() pool.join() if result.successful(): print "check successfull" """ #multiThread() # test for thread operation multiThreadForDuplicateCheck() #multi thread operation,lower than mutiprocess #multiProcessForDuplicateCheck() #multi process operation