from docx import Document
file = Document("E:\\File\\大一\\大一下学期/马克思.docx")
我们直接输出文章内容是不可以的:
print(file)
#
我们可以使用循环的方式进行输出text文本:
# 输出内容
for run in file.paragraphs:
print(run.text)
for run in file.paragraphs:
if run.style.name == "Heading 1": # 'Heading 2' 表示二级标题...
print(run.text)
# 输出所有标题
for run in file.paragraphs:
if re.match('^Heading \d+$', run.style.name):
print(run.text)
# 输出正文
for run in file.paragraphs:
if run.style.name == "Normal":
print(run.text)
file.paragraphs[0].text
# 如果该段为空格或者其他非段落内容,则输出这一行
for i in range(len(file.paragraphs)):
print(i, file.paragraphs[0].text)
file.save("E:/aa.docx")