pdf的换行转化为不换行的txt

qibaoyuan@ict:~/qibaoyuan/subject/python$ cat pdftype2txttype.py #! /usr/bin/python #coding=utf8 import os,sys import itertools def anyTrue(predicate,sequence): return True in itertools.imap(predicate,sequence) if __name__=='__main__': if len(sys.argv)!=3: print 'usage:',sys.argv[0],' from_folder to_folder' else: from_dir=sys.argv[1] to_dir=sys.argv[2] parts=os.listdir(from_dir) parts.sort() for file_name in parts: if not anyTrue(file_name.endswith,(".txt")): continue file_path=os.path.join(from_dir,file_name) file_obj=open(file_path,'r') to_file=os.path.join(to_dir,file_name) output=open(to_file,'w') str_0='' for line in file_obj: line=line.strip() str_0=str_0+line if anyTrue(line.endswith,("。",",",",",";",":",";")): output.write(str_0+"/r/n") str_0='' file_obj.close() output.close() qibaoyuan@ict:~/qibaoyuan/subject/python$

 

你可能感兴趣的:(File,OS,Path,import,output)