使用PYTHON切分、合并文件

虽然在LINUX下已经有了SPLIT命令可以用来切分文件,不过仍然有一些限制,不能完全按着你的需求随心所欲的切分,比如说切分后文件名的命名规则。刚才试着用PYTHON写了一个切分文件的脚本,是按照大小来切分的,当然,如果你需要按照行数或者其他什么标准来切分,修改脚本相应代码就可以了。

splitfile.py

 

import sys
import os

#the suffix of the splitted files
fileind = 0

#indicate whether split or combine
type = ''

#the filename of which to be splitted or combined
filename = ''

#the size of the each splitted file(unit of MB),default to 1
slice = 1

#get the arguments
i = 1
while (i < len(sys.argv)):
        arg = sys.argv[i]
        if arg == '--type' or arg == '-t':
                i += 1
                type = sys.argv[i]
        elif arg == '--filename' or arg == '-f':
                i += 1
                filename = sys.argv[i]
        elif arg == '--slice' or arg == '-s':
                i += 1
                slice = sys.argv[i]
        else:
                pass
        i += 1

#default to 1M each slice,except the last one
def splitFile(filename,slice):
    chunk = 1048576 * int(slice)      #1048576 equals 1M
    file_ori = open(filename,"rb")
    while True:
        content = file_ori.read(chunk)
        if content == "":       #reach the end of the file
            break
        newfilename = getSplitFileName(filename)
        file_split = open(newfilename,"wb")
        file_split.write(content)
        file_split.close()
    file_ori.close()
    print "split file " + filename + " succeed" 
  

def getSplitFileName(sourcefile):
    global fileind
    fileind = fileind + 1
    ext_pos = sourcefile.rfind('.')
    if ext_pos > 0:
        return sourcefile + '.part' + str(fileind)
    else:
        print "check the suffix of the file"
        sys.exit(1)

def combine(filename):
    count = 0
    file_ori = open(filename, "wb")
    while True:
        count = count + 1
        split_file = filename + '.part' + str(count)
        if not os.path.exists(split_file):      #combine complete
            break
        else:
            file_split = open(split_file, "rb")
            content = file_split.read()
            file_split.close()
            file_ori.write(content)
    file_ori.close()
    print "combine file " + filename + " succeed"

def usage():
    print "usage:"
    print "/t python filesplit.py --type('s' or 'c') --filename --slice"
   
def main():
    if(type == '') or (filename == ''):
        usage()
    if type == 's':
        splitFile(filename,slice)
    elif type =='c':
        combine(filename)
    else:
        usage()

main()

 

下面是运行情况

dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 126M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:20 p1.py

#进行拆分
dwapp@pttest1:/home/dwapp/joe.wangh/test>python p1.py -t s -f haha.txt2 -s 10
split file haha.txt2 succeed
dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 251M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part1
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part10
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part11
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part12
-rw-r--r-- 1 dwapp supergroup 5.1M 11-13 19:20 haha.txt2.part13
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part2
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part3
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part4
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part5
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part6
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part7
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part8
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part9
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:20 p1.py

#重命名源文件
dwapp@pttest1:/home/dwapp/joe.wangh/test>mv haha.txt2 haha.txt2.bak

#对前面拆分的文件进行组合
dwapp@pttest1:/home/dwapp/joe.wangh/test>python p1.py -t c -f haha.txt2 -s 10
combine file haha.txt2 succeed
dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 376M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:25 haha.txt2
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2.bak
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part1
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part10
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part11
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part12
-rw-r--r-- 1 dwapp supergroup 5.1M 11-13 19:20 haha.txt2.part13
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part2
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part3
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part4
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part5
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part6
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part7
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part8
-rw-r--r-- 1 dwapp supergroup  10M 11-13 19:20 haha.txt2.part9
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:25 p1.py

你可能感兴趣的:(python,File,脚本,ext,equals,each)