python docx寻找到文中图片下载并反相处理

寻找到图片有两种方法,见下面代码,inline_shapes是内联图片的遍历,找到rid,用document_par.related_parts[rID]获取图片。用图片._blob保存。

from docx import Document    #pip3 install python-docx 
from docx.shared import Inches #英寸
import os  

#from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Cm #厘米
from docx.shared import RGBColor


from docx.enum.table import WD_TABLE_ALIGNMENT  #表格排列方式
from docx.enum.table import WD_ALIGN_VERTICAL
from docx.enum.text import WD_ALIGN_PARAGRAPH

from natsort import natsorted   #自然排序



from docx.oxml import OxmlElement
from docx.oxml.ns import qn

def set_cell_border(cell, **kwargs):
    """
    Set cell`s border
    Usage:
    set_cell_border(
        cell,
        top={"sz": 12, "val": "single", "color": "#FF0000", "space": "0"},
        bottom={"sz": 12, "color": "#00FF00", "val": "single"},
        left={"sz": 24, "val": "dashed", "shadow": "true"},
        right={"sz": 12, "val": "dashed"},
    )
    """
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()

    # check for tag existnace, if none found, then create one
    tcBorders = tcPr.first_child_found_in("w:tcBorders")
    if tcBorders is None:
        tcBorders = OxmlElement('w:tcBorders')
        tcPr.append(tcBorders)

    # list over all available tags
    for edge in ('left', 'top', 'right', 'bottom', 'insideH', 'insideV'):
        edge_data = kwargs.get(edge)
        if edge_data:
            tag = 'w:{}'.format(edge)

            # check for tag existnace, if none found, then create one
            element = tcBorders.find(qn(tag))
            if element is None:
                element = OxmlElement(tag)
                tcBorders.append(element)

            # looks like order of attributes is important
            for key in ["sz", "val", "color", "space", "shadow"]:
                if key in edge_data:
                    element.set(qn('w:{}'.format(key)), str(edge_data[key]))


import re
doc=Document(r'd:\\2\\观澜石皮1.docx')


# 如下可以找到内联图及非内联图
for p_num,paragraph in enumerate(doc.paragraphs):
    for r_num,run in enumerate(paragraph.runs):
        print(doc.paragraphs[p_num].runs[r_num].element.drawing_lst)


for num,shape in  enumerate(doc.inline_shapes):
    print(shape)
    blip = doc.inline_shapes[num]._inline.graphic.graphicData.pic.blipFill.blip
    rID = blip.embed
    print(rID)
    document_part = doc.part
    image_part = document_part.related_parts[rID]

    fr = open("test"+str(num)+".png", "wb")
    fr.write(image_part._blob)
    fr.close()
    # 
    import cv2
    # opencv读取图像
    img = cv2.imread("test"+str(num)+".png", 1)
    print(img.shape)
    cv2.imshow('img', img)
    img_shape = img.shape  # 图像大小(565, 650, 3)
    print(img_shape)
    h = img_shape[0]
    w = img_shape[1]
    # 最大图像灰度值减去原图像,即可得到反转的图像
    dst = 255 - img

    cv2.imwrite("test_Opp"+str(num)+".png",dst)
    cv2.imshow('dst', dst)




# fr = open("test反向.png", "wb")
# fr.write(dst._blob)
# fr.close()

cv2.waitKey(0)
document_part.related_parts[rID]=dst
doc.Save()
exit()

input('按任意键退出。。。。。。。')        

你可能感兴趣的:(python docx寻找到文中图片下载并反相处理)