编辑docx中字符的一个方法

# -*- coding: utf-8 -*-
"""
filename : net_csdn_bbs_topics392218239.py
author: [email protected] QQ: 443089607 weixin: huzhenghui weibo: http://weibo.com/443089607
category : python-docx
original url : http://bbs.csdn.net/topics/392218239
original title : python_docx 读取文档段落中的单个字符
title : 编辑docx中字符的一个方法
csdn blog url :
weibo article url :
weibo message url :
为了清晰直观展现python严格要求的缩进,请访问博客链接
详细说明见源代码中的注释
"""

# standard import
import copy
import logging
import random
import os

import docx

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug('start')
STR_SCRIPT_DIR, STR_SCRIPT_FILE = os.path.split(__file__)
logging.debug('STR_SCRIPT_DIR : %s', STR_SCRIPT_DIR)
logging.debug('STR_SCRIPT_FILE : %s', STR_SCRIPT_FILE)
STR_SCRIPT_PREFIX = os.path.splitext(STR_SCRIPT_FILE)[0]
logging.debug('STR_SCRIPT_PREFIX : %s', STR_SCRIPT_PREFIX)
# 这是需要搜索并加粗的关键字符串
STR_KEYWORD = 'boldme'
# 以下代码用于创建一个空白的测试文档,随机生成字符串,随机插入关键字符串
DOCUMENT_ORIGINAL = docx.Document()
for int_p in range(random.randint(10, 20)):
    paragraph = DOCUMENT_ORIGINAL.add_paragraph()
    for int_r in range(random.randint(10, 20)):
        str_run = ''
        for int_c in range(random.randint(10, 20)):
            str_run += random.choice('1234567890abcdefghijklmnopqrstuvwxyz')
        # 随机插入关键字符串
        if random.randint(0, 100) < 5:
            str_run += STR_KEYWORD
        for int_c in range(random.randint(10, 20)):
            str_run += random.choice('1234567890abcdefghijklmnopqrstuvwxyz')
        paragraph.add_run(str_run)
for paragraph in DOCUMENT_ORIGINAL.paragraphs:
    logging.debug('generated paragraph : %s', paragraph.text)
STR_ORIGINAL_FILE = os.path.join(STR_SCRIPT_DIR, STR_SCRIPT_PREFIX + '.original.docx')
logging.debug('STR_ORIGINAL_FILE : %s', STR_ORIGINAL_FILE)
DOCUMENT_ORIGINAL.save(STR_ORIGINAL_FILE)
# 重新打开测试文档
DOCUMENT_NEW = docx.Document(STR_ORIGINAL_FILE)
# 遍历文档中的段落
for paragraph in DOCUMENT_NEW.paragraphs:
    bool_found = False
    # 遍历段落中的run查找是否包含关键字符串
    for run in paragraph.runs:
        if STR_KEYWORD in run.text:
            logging.debug('Found keyword : %s', run.text)
            bool_found = True
    if bool_found is True:
        # 深度复制段落中的内容
        list_runs = copy.deepcopy(paragraph.runs)
        # 清空段落中的内容
        paragraph.clear()
        # 遍历已复制的段落中的内容
        for run in list_runs:
            # 判断该run是否包含关键字符串
            if STR_KEYWORD in run.text:
                bool_first_part = True
                # 按照关键字符串拆分该run
                for str_part in run.text.split(STR_KEYWORD):
                    # 判断是否为第一部分
                    if bool_first_part is True:
                        bool_first_part = False
                    else:
                        # 如果不是第一部分,则在段落中加入关键字符串并设置粗体
                        paragraph.add_run(STR_KEYWORD).bold = True
                    # 在段落中加入被拆分的部分
                    paragraph.add_run(str_part, run.style)
            else:
                # 如果该run不包含关键字符串则直接加入段落
                paragraph.add_run(run.text, run.style)
# 保存至新的文件
STR_NEW_FILE = os.path.join(STR_SCRIPT_DIR, STR_SCRIPT_PREFIX + '.new.docx')
logging.debug('STR_NEW_FILE : %s', STR_NEW_FILE)
DOCUMENT_NEW.save(STR_NEW_FILE)
#end of file

你可能感兴趣的:(python)