python调用word_python使用python-docx操作word

章节三:python使用python-docx操作word

1、python-docx库介绍

该模块儿可以创建、修改Word(.docx)文件;

此模块儿不属于python标准库,需要单独安装;

python-docx使用官网: python-docx官网

我们在安装此模块儿使用的是pip install python-docx,但是在导入的时候是import docx;

2、Python读取Word文档内容

注意:每进行一个操作,必须保存一下,否则等于白做;

1)word文档结构介绍

20200509225516945.png

2)python-docx提取文字和文字块儿

① python-docx提取文字

有一个这样的docx文件,我们想要提取其中的文字,应该怎么做?

20200509225559904.png

代码如下:

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

print(doc.paragraphs)

for paragraph in doc.paragraphs:

print(paragraph.text)

结果如下:

20200509225611992.png

② python-docx提取文字块儿

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

print(doc.paragraphs)

paragraph = doc.paragraphs[0]

runs = paragraph.runs

print(runs)

for run in paragraph.runs:

print(run.text)

------------------------------

paragraph = doc.paragraphs[1]

runs = paragraph.runs

print(runs)

for run in paragraph.runs:

print(run.text)

结果如下:

20200509225625103.png

3)利用Python向Word文档写入内容

① 添加段落

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

# print(doc.add_heading("一级标题", level=1)) 添加一级标题的时候出错,还没有解决!

paragraph1 = doc.add_paragraph("这是一个段落")

paragraph2 = doc.add_paragraph("这是第二个段落")

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

"""

添加段落的时候,赋值给一个变量,方便我们后面进行格式调整;

"""

结果如下:

20200509225636745.png

② 添加文字块儿

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

# 这里相当于输入了一个空格,后面等待着文字输入

paragraph3 = doc.add_paragraph()

paragraph3.add_run("我被加粗了文字块儿").bold = True

paragraph3.add_run(",我是普通文字块儿,")

paragraph3.add_run("我是斜体文字块儿").italic = True

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

结果如下:

2020050922564864.png

③ 添加一个分页

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

doc.add_page_break()

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

结果如下:

20200509225700611.png

④ 添加图片

from docx import Document

from docx.shared import Cm

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

doc.add_picture(r"G:\6Tipdm\7python办公自动化\concat_word\sun_wu_kong.png",width=Cm(5),height=Cm(5))

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

"""

Cm模块,用于设定图片尺寸大小

"""

结果如下:

20200509225711864.png

⑤ 添加表格

from docx import Document

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

list1 = [

["姓名","性别","家庭地址"],

["唐僧","男","湖北省"],

["孙悟空","男","北京市"],

["猪八戒","男","广东省"],

["沙和尚","男","湖南省"]

]

list2 = [

["姓名","性别","家庭地址"],

["貂蝉","女","河北省"],

["杨贵妃","女","贵州省"],

["西施","女","山东省"]

]

table1 = doc.add_table(rows=5,cols=3)

for row in range(5):

cells = table1.rows[row].cells

for col in range(3):

cells[col].text = str(list1[row][col])

doc.add_paragraph("-----------------------------------------------------------")

table2 = doc.add_table(rows=4,cols=3)

for row in range(4):

cells = table2.rows[row].cells

for col in range(3):

cells[col].text = str(list2[row][col])

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

结果如下:

20200509225725903.png

⑥ 提取word表格,并保存在excel中(很重要)

from docx import Document

from openpyxl import Workbook

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test2.docx")

t0 = doc.tables[0]

workbook = Workbook()

sheet = workbook.active

for i in range(len(t0.rows)):

list1 = []

for j in range(len(t0.columns)):

list1.append(t0.cell(i,j).text)

sheet.append(list1)

workbook.save(filename = r"G:\6Tipdm\7python办公自动化\concat_word\来自word中的表.xlsx")

结果如下:

20200509225737680.png

3、利用Python调整Word文档样式

1)修改文字字体样式

from docx import Document

from docx.shared import Pt,RGBColor

from docx.oxml.ns import qn

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test2.docx")

for paragraph in doc.paragraphs:

for run in paragraph.runs:

run.font.bold = True

run.font.italic = True

run.font.underline = True

run.font.strike = True

run.font.shadow = True

run.font.size = Pt(18)

run.font.color.rgb = RGBColor(255,255,0)

run.font.name = "宋体"

# 设置像宋体这样的中文字体,必须添加下面2行代码

r = run._element.rPr.rFonts

r.set(qn("w:eastAsia"),"宋体")

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\_test1.docx")

结果如下:

20200509225752345.png

2)修改段落样式

① 对齐样式

from docx import Document

from docx.enum.text import WD_ALIGN_PARAGRAPH

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

print(doc.paragraphs[0].text)

doc.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

# 这里设置的是居中对齐

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\对齐样式.docx")

"""

LEFT,CENTER,RIGHT,JUSTIFY,DISTRIBUTE,JUSTIFY_MED,JUSTIFY_HI,JUSTIFY_LOW,THAI_JUSTIFY

"""

结果如下:

20200509225806154.png

② 行间距调整

from docx import Document

from docx.enum.text import WD_ALIGN_PARAGRAPH

doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")

for paragraph in doc.paragraphs:

paragraph.paragraph_format.line_spacing = 5.0

doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\行间距.docx")

结果如下:

20200509225818146.png

③ 段前与段后间距

这里提供代码,自行下去检验

20200509225832201.png

你可能感兴趣的:(python调用word)