Python将doc文件里的表格汇总
"""
Created on Sun May 7 14:22:49 2023
@author: ypzhao
"""
import docx
from docx.shared import Cm
from openpyxl import Workbook
from docx import Document
from docx.shared import Pt, RGBColor
from docx.oxml.ns import qn
import os
import time
def handleFile(filename):
docx = docx.Document(r'D:\\CODE\\pythoncode\\1.docx')
'''段落读取'''
print(len(docx.paragraphs))
print(docx.paragraphs[0].text)
'''指定word某几段内容读取'''
for i in range(2, 5):
print(docx.paragraphs[i].text)
'''读取word种所有内容读取'''
for paragraph in docx.paragraphs:
print(paragraph.text)
for t in docx.tables:
for row in t.rows:
row_str = []
for cell in row.cells:
row_str.append(cell.text)
print(row_str)
p1 = docx.add_paragraph('这是一个段落')
p1.add_run('加粗的一句话').bold = True
p1.add_run("这句是斜体文字块").italic = True
docx.add_paragraph('这是第二个段落')
docx.add_paragraph('这是一个段落,后面带图片')
'''
Cm 模块,用于设定图片尺寸大小
只给定一个宽度或高度
'''
docx.add_picture('1.png', width=Cm(14), height=Cm(7))
docx.add_paragraph('这是第二个段落')
docx.add_page_break()
paragraph1 = docx.add_paragraph("这是新增的一页")
Document().add_heading('正文', 1).add_run("前言")
Document().add_heading('标题', 2)
'''添加表格'''
para = docx.add_paragraph().add_run('\n')
para = docx.add_paragraph().add_run('\n')
para = docx.add_paragraph().add_run('\n')
para = docx.add_paragraph().add_run('\n')
list1 = [
["语文", "数学", "英语"],
["100", "100", "100"],
["100", "100", "100"],
["100", "100", "100"],
["100", "100", "100"]
]
list2 = [
["政治", "历史", "地理"],
["100", "100", "100"],
["100", "100", "100"],
["100", "100", "100"],
["100", "100", "100"]
]
table1 = docx.add_table(rows=5, cols=3)
for row in range(5):
cells = table1.rows[row].cells
for col in range(3):
cells[col].text = str(list1[row][col])
docx.add_paragraph("---------------------------------------------------------")
table2 = docx.add_table(rows=4, cols=3)
for row in range(4):
cells = table2.rows[row].cells
for col in range(3):
cells[col].text = str(list2[row][col])
t = docx.tables[1]
workbook = Workbook()
sheet = workbook.active
for i in range(len(t.rows)):
list1 = []
for j in range(len(t.columns)):
list1.append(t.cell(i, j).text)
sheet.append(list1)
workbook.save("table1.xlsx")
docx.save('test.docx')
def getfilelist(dirname, ls):
for root, dirs, files in os.walk(dirname):
for f in files:
m = os.path.join(root, f)
ls.append(m)
def test(path):
from docx import Document
doc = Document(path)
from copy import deepcopy
document2 = Document("result.doc")
p1 = document2.add_paragraph('文件名:'+path)
p1.runs[0].font.bold=True
p1.runs[0].font.color.rgb = RGBColor(255,0,0)
document = Document(path)
i =0
for t in doc.tables:
i+=1
p1 = document2.add_paragraph('表格:'+ str(i))
table = t
new_table = deepcopy(table)
paragraph = document2.add_paragraph()
paragraph._p.addnext(new_table._element)
document2.save("result.doc")
def init():
from docx import Document
doc1 = Document("result.doc")
doc1._body.clear_content()
doc1.save("result.doc")
if __name__ == '__main__':
init()
ls = []
getfilelist('D:/test/', ls)
for tep in ls:
print("解析"+tep)
test(tep)
print("完成 , 结果输出在result.doc里")
time.sleep(10)