Python之word文档内容读取

#读取docx中的文本代码
import docx
import re

#读取文档
file=docx.Document("D:\\file\\wenjian.docx")
print("段落数:"+str(len(file.paragraphs)))
file_word=docx.Document()

tables=file.tables

for i in range(len(tables)):
    tb=tables[i]
    tb_rows=tb.rows
    for i in range(len(tb_rows)):
        row_data=[]
        row_cells=tb_rows[i].cells
        for cell in row_cells:
            row_data.append(cell.text)
        print(row_data)
        
        
#输出每一段内容
for para in file.paragraphs:
    print(para.text)
    
#输出段落号及内容
para_data=[]
for i in range(len(file.paragraphs)):
    #for j in map(lambda x:x.split(' '),file.paragraphs[i].text):
    para_single=file.paragraphs[i].text.split(' ')
    #移除空格
    while '' in para_single:
        para_single.remove('')
    #para_single.append(para_single)
    for data_number in range(len(para_single)):
        data_num=re.findall(r"\d",para_single[data_number])
        data_num=''.join(data_num)
        para_data.append(data_num+' ')
file_word.add_paragraph(para_data)
file_word.save("D:\\file\\jieguo.docx")

你可能感兴趣的:(Python,python,word)