关注 https://space.bilibili.com/187492698 代码演示
# -*- coding: utf-8 -*-
# Version: Python 3.9.7
# Author: TRIX
# Date: 2022-09-08 20:40:11
# Use:
# intro: 批量把word表格数据提取到Excel
import re
from win32com.client import Dispatch#pip install pypiwin32
word=Dispatch('Word.Application')#打开word
word.Visible=False#显示word界面
pat=re.compile(r'.+?',re.S)
def getInfo(f):
doc=word.Documents.Open(f)#打开doc/docx
table = doc.Tables(1)
tableinfo=table.Range.Text
infoL=[text.replace('\r','').replace('\x07','') for text in pat.findall(tableinfo)]
info=infoC(infoL[1],infoL[3],infoL[13],infoL[9],infoL[25],infoL[29].split('QQ')[0].replace('电话:',''),infoL[33],' '.join(infoL[45:48]))#'姓名','学号','性别','寝室号','籍贯','身份证号','电话','家庭住址','父母联系方式'
for n in range(5):#6-1
starti=51+n*6
endi=starti+3
if infoL[starti]!='':#如果该行第一个单元格有填写信息
info.ptels+='|'
info.ptels+=' '.join(infoL[starti:endi])
info.ptels=info.ptels.strip()
print(info.getDetailInfo())
return info
def getNids():
with open('221.txt','r',encoding='utf-8') as f:
nids=f.readlines()
return nids
nids=getNids()
class infoC(object):
def __init__(self,name,sex,dorm,nat,pid,tel,loc,ptels):
self.name = name
self.nid = 'NoValue'
for n in nids:#如果在nids找到就赋值给nid
if name in n:
self.nid = n.split('\t')[0]
break
self.sex = sex
self.dorm = dorm
self.nat = nat
self.pid = pid
self.tel = tel
self.loc = loc
self.ptels = ptels
def getDetailInfo(self):
return f'name:{self.name}\nnid:{self.nid}\nsex:{self.sex}\ndorm:{self.dorm}\nnative:{self.nat}\nid:{self.pid}\ntel:{self.tel}\nloc:{self.loc}\nptels:{self.ptels}'
def getAttrL(self):
return [self.name,self.nid,self.sex,self.dorm,self.nat,self.pid,self.tel,self.loc,self.ptels]
from pandas import DataFrame,ExcelWriter
def saveTable(outXlsx,tables=[],sheet_names=[]):
'''没写意外判断的代码
把tables中多个table以sheet_names中对应的sheetname保存到outXlsx'''
if not tables:raise ('no data')
with ExcelWriter(outXlsx) as writer:
for i,table in enumerate(tables):
df=DataFrame(table)
if sheet_names:df.to_excel(writer,sheet_name=sheet_names[i],header=0,index=0)#writer=文件路径或现有的ExcelWriter sheet_name=它是指包含DataFrame的工作表的名称 header=写出列名 如果给出了字符串列表, 则假定它是列名的别名 index=写入索引
else:df.to_excel(writer,sheet_name=str(i+1),header=0,index=0)
from guietta import QFileDialog
files = QFileDialog.getOpenFileNames(None, "Open File",r"","Images (*.doc *.docx)")[0]#files
def saveXLsx():
resTable=[['序号','姓名','学号','性别','寝室号','籍贯','身份证号','电话','家庭住址','父母联系方式']]
for i,f in enumerate(files):
info=getInfo(f)
resTable.append([str(i+1),*info.getAttrL()])
saveTable(outpath,tables=[resTable])
outpath='res.xlsx'
saveXLsx()