虽然在LINUX下已经有了SPLIT命令可以用来切分文件,不过仍然有一些限制,不能完全按着你的需求随心所欲的切分,比如说切分后文件名的命名规则。刚才试着用PYTHON写了一个切分文件的脚本,是按照大小来切分的,当然,如果你需要按照行数或者其他什么标准来切分,修改脚本相应代码就可以了。
splitfile.py
import sys
import os
#the suffix of the splitted files
fileind = 0
#indicate whether split or combine
type = ''
#the filename of which to be splitted or combined
filename = ''
#the size of the each splitted file(unit of MB),default to 1
slice = 1
#get the arguments
i = 1
while (i < len(sys.argv)):
arg = sys.argv[i]
if arg == '--type' or arg == '-t':
i += 1
type = sys.argv[i]
elif arg == '--filename' or arg == '-f':
i += 1
filename = sys.argv[i]
elif arg == '--slice' or arg == '-s':
i += 1
slice = sys.argv[i]
else:
pass
i += 1
#default to 1M each slice,except the last one
def splitFile(filename,slice):
chunk = 1048576 * int(slice) #1048576 equals 1M
file_ori = open(filename,"rb")
while True:
content = file_ori.read(chunk)
if content == "": #reach the end of the file
break
newfilename = getSplitFileName(filename)
file_split = open(newfilename,"wb")
file_split.write(content)
file_split.close()
file_ori.close()
print "split file " + filename + " succeed"
def getSplitFileName(sourcefile):
global fileind
fileind = fileind + 1
ext_pos = sourcefile.rfind('.')
if ext_pos > 0:
return sourcefile + '.part' + str(fileind)
else:
print "check the suffix of the file"
sys.exit(1)
def combine(filename):
count = 0
file_ori = open(filename, "wb")
while True:
count = count + 1
split_file = filename + '.part' + str(count)
if not os.path.exists(split_file): #combine complete
break
else:
file_split = open(split_file, "rb")
content = file_split.read()
file_split.close()
file_ori.write(content)
file_ori.close()
print "combine file " + filename + " succeed"
def usage():
print "usage:"
print "/t python filesplit.py --type('s' or 'c') --filename --slice"
def main():
if(type == '') or (filename == ''):
usage()
if type == 's':
splitFile(filename,slice)
elif type =='c':
combine(filename)
else:
usage()
main()
下面是运行情况
dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 126M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:20 p1.py
#进行拆分
dwapp@pttest1:/home/dwapp/joe.wangh/test>python p1.py -t s -f haha.txt2 -s 10
split file haha.txt2 succeed
dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 251M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part1
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part10
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part11
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part12
-rw-r--r-- 1 dwapp supergroup 5.1M 11-13 19:20 haha.txt2.part13
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part2
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part3
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part4
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part5
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part6
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part7
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part8
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part9
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:20 p1.py
#重命名源文件
dwapp@pttest1:/home/dwapp/joe.wangh/test>mv haha.txt2 haha.txt2.bak
#对前面拆分的文件进行组合
dwapp@pttest1:/home/dwapp/joe.wangh/test>python p1.py -t c -f haha.txt2 -s 10
combine file haha.txt2 succeed
dwapp@pttest1:/home/dwapp/joe.wangh/test>ll -h
总计 376M
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:25 haha.txt2
-rw-r--r-- 1 dwapp supergroup 126M 11-13 19:14 haha.txt2.bak
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part1
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part10
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part11
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part12
-rw-r--r-- 1 dwapp supergroup 5.1M 11-13 19:20 haha.txt2.part13
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part2
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part3
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part4
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part5
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part6
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part7
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part8
-rw-r--r-- 1 dwapp supergroup 10M 11-13 19:20 haha.txt2.part9
-rw-r--r-- 1 dwapp supergroup 2.3K 11-13 19:25 p1.py