python-docx自动替换关键字

原本打算是能自动将旧的docx文件中的关键字替换,生成新的docx文件,格式、线段、图片、页眉页脚等都不变
结果目前有一下几点问题(后半部分更新已解决):
1.docx只能新增图片,不能保留原有的图片,不能保留线段
2.有的文字格式可以保留,有的不行
3.paragraph.run会将"编制:郭锋"切割为"编/制:郭/锋"(这什么鬼?)
目前只能做到提示功能,提示哪个文件需要修改,需要修改什么。
求大神指导怎么解决以上几个问题!!!
下面附代码(有一部分代码是我抄的)

import copy
import docx
import os
def change_key(doc, new_doc, dic):

    if dic and doc and new_doc:
        count = 0
        doc_path =  doc
        new_doc_path =  new_doc
        doc_file = docx.Document(doc_path)
        # 替换段落内容
        #print(doc_file._element.xml)
        for paragraph in doc_file.paragraphs:
            # 深度复制段落内容,包括样式。如果不深度复制,样式会丢失(这下面原来的代码不能保证保留样式。。。)
            #print(paragraph.style)
            # list_runs = copy.deepcopy(paragraph.runs)
            # paragraph.clear()
            # #文字替换
            # for run in list_runs:
            #     for name in dic:
            #         if name in run.text:
            #             value = dic[name]
            #             print(name+"->"+str(value))
            #             run.text = run.text.replace(name, str(value))
            #             count+=1
            #     # 段落样式的复制
            #     paragraph.add_run(run.text, run.style)
            for name in dic:
            	if name in paragraph.text:
            		print(name+"->"+dic[name])
            		count+=1
        # 替换表格内容
        for table in doc_file.tables:
            # 深度复制表格内容,包括样式, 如果不深度复制,样式会丢失
            table_style = copy.deepcopy(table.style)
            for row in table.rows:
                for cell in row.cells:
                    for name in dic:
                        if name in cell.text:
                            value = dic[name]
                            print(name+"->"+str(value))
                            #cell.text = cell.text.replace(name, str(value))
                            count+=1
            #table.style=table_style
        
        #doc_file.save(new_doc_path)
        
        if count != 0:
        	print("需修改文件"+str(doc))
        	print("需修改次数:"+str(count))
        	print("------------------------------------------------")
        else:
        	print(str(doc)+"  无需修改")
        	print("------------------------------------------------")
    else:
        return

value_dic = {
  "2016":"2018",
  "2015":"2017",
  "TyCloud":"MIMS"
}
rootdir = 'C:\\Users\\Admin\\Desktop\\python\\old'
rootdir2 = 'C:\\Users\\Admin\\Desktop\\python\\new'
list1 = os.listdir(rootdir) #列出文件夹下所有的目录与文件
for i in range(0,len(list1)):
	path = os.path.join(rootdir,list1[i])
	path2 = os.path.join(rootdir2,list1[i])
	if os.path.basename(path).split('.')[1] == 'docx':
		change_key(path, path2, value_dic)

以下是新版本(感谢胡大仙的支持)

from docx import Document
import os

OLDPATH = "C:\\Users\\Admin\\Desktop\\python\\old"
PATH = "C:\\Users\\Admin\\Desktop\\python\\new"

DICT = {
  "想要被替换的字符串":"新的字符串"
}

def main():
    for fileName in os.listdir(OLDPATH):
        oldFile = OLDPATH + "\\" + fileName
        newFile = PATH + "\\" + fileName
        if oldFile.split(".")[1] == 'docx':
            document = Document(oldFile)
            document = check(document)
            document.save(newFile)   

def check(document):
    # tables
    for table in document.tables:
        for row in range(len(table.rows)):
            for col in range(len(table.columns)):
                for key, value in DICT.items():
                    if key in table.cell(row ,col).text:
                        print(key+"->"+value)
                        table.cell(row ,col).text = table.cell(row ,col).text.replace(key, value)

    # paragraphs
    for para in document.paragraphs:
        for i in range(len(para.runs)):
            for key, value in DICT.items():
                if key in para.runs[i].text:
                    print(key+"->"+value)
                    para.runs[i].text = para.runs[i].text.replace(key, value)

    return document 
    
if __name__ == '__main__':
	main()

python不可以直接对.doc的文件进行操作,需要将doc文件改为docx文件再利用python-docx库进行操作。以下代码是批量doc转docx。

import os
from win32com import client
'''
将对应文件夹下的doc文件转为docx文件
'''
def doc_to_docx(path):
    if os.path.splitext(path)[1] == ".doc":
        word = client.Dispatch('Word.Application')
        doc = word.Documents.Open(path)  # 目标路径下的文件
        doc.SaveAs(os.path.splitext(path)[0]+".docx", 16)  # 转化后路径下的文件
        doc.Close()
        word.Quit()
        print("转换完成")

def find_file(path, ext, file_list=[]):
    dir = os.listdir(path)
    for i in dir:
        i = os.path.join(path, i)
        if os.path.isdir(i):
            find_file(i, ext, file_list)
        else:
            if ext == os.path.splitext(i)[1]:
                file_list.append(i)
    return file_list
 
 
dir_path = "C:\\Users\\Admin\\Desktop\\python\\old"#批量转换文件夹
ext = ".doc"
file_list = find_file(dir_path, ext)
for file in file_list:
    doc_to_docx(file)

你可能感兴趣的:(python,自用)