归并大文件

import sys

#result = []
#f=open(sys.argv[1],'r')
ff = open('./groupby.txt','w')

prefix = 'x'
files = ['aa','ab','ac','ad','ae','af','ag','ah','ai','aj','ak','al','am','an','ao','ap','aq','ar','as','at','au','av','aw','ax','ay','az','ba','bb','bc','bd','be','bf']
def initList():
    levelList = []
    for i in range(0,601):
        levelList.append(0)
    return levelList

def makeGroup(datas,levelList):
    for data in datas:
        level = int(data[0])

        temp = levelList[level]
        levelList[level] = temp + 1
    #return levelList
def groupby(f,result):
        stack = []
        for line in f.readlines():
                if int(line) >=0 and int (line)<=600:
                        #print 'line-'+line
                        if len(stack) == 0:
                                stack.append(line)
                        else:
                                if int(line) == stack[len(stack)-1]:
                                        stack.append(line)
                                else:
                                        # level,number
                                        handler(stack[0],len(stack),result)
                                        stack = []
                                        stack.append(line)
        handler(stack[0],len(stack),result)

def handler(level,number,result):
        r = []
        r.append(level)
        r.append(number)
        result.append(r)
def main():
        levelList = initList()
        for file in files:
                result = []
                print file
                f = open(prefix+file,'r')
                groupby(f,result)
                f.close()
                makeGroup(result,levelList)
        ff.write('level' + '\t' + 'number' + '\n')
        i = 0
        for level in levelList:
                ff.write(str(i) + "\t" + str(level)+'\n')
                i += 1
        ff.close()
if __name__ == '__main__':
        main()

你可能感兴趣的:(python,F#)