python批量转换word和excel格式

python批量转换word和excel格式

  • 实现的目标
  • 用到的python模块
  • 脚本内容

实现的目标

批量将目录(包括子目录)下的所有doc和xls文件,转换为docx和xlsx格式。

用到的python模块

pip install pywin32

脚本内容

import os
import os.path
import win32com.client as win32
import threading
#解决pywintypes.com_error报错
import pythoncom

## 根目录
rootdir = u'D:\旧版文档'

def xls2xlsx():
    #解决pywintypes.com_error报错
    pythoncom.CoInitialize()
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    for parent, dirnames, filenames in os.walk(rootdir):
        for fn in filenames:
            filedir = os.path.join(parent, fn)
            if fn.endswith('.xls'):
                #打印处理的文件名
                print(filedir)
                try:
                    wb = excel.Workbooks.Open(filedir)
            # xlsx: FileFormat=51
            # xls:  FileFormat=56,
                    try:
                        wb.SaveAs(filedir.replace('xls', 'xlsx'), FileFormat=51)
                        wb.Close()
                        os.remove(filedir)
                    except Exception as e:
                        print(e)
                except Exception as e:
                    print(e)
    excel.Application.Quit()
    print('xls end')

def doc2docx():
    pythoncom.CoInitialize()
    word = win32.Dispatch("Word.Application")
    # 三个参数:父目录;所有文件夹名(不含路径);所有文件名
    for parent, dirnames, filenames in os.walk(rootdir):
        for fn in filenames:
            filedir = os.path.join(parent, fn)
            if fn.endswith('.doc'):
                #打印处理的文件名
                print(filedir)
                try:
                    doc = word.Documents.Open(filedir)
                    try:
                        doc.SaveAs("{}x".format(filedir),12)   #另存为后缀为".docx"的文件,其中参数12指docx文件
                        doc.Close()
                        os.remove(filedir)
                    except Exception as e:
                        print(e)
                except Exception as e:
                    print(e)
    word.Quit()
    print('doc end')

def main():
    
    threads = []
    t1 = threading.Thread(target=xls2xlsx,args=())
    t2 = threading.Thread(target=doc2docx,args=())
    threads.append(t1)
    threads.append(t2)
    for t in threads:
        t.setDaemon(True)
        t.start()
    for i in threads:
        i.join()
    print("所有任务完成")
        
if __name__ == '__main__':
    main()

你可能感兴趣的:(python,文档处理)