将指定目录下所有的docx文件,读取到一个Excel进行展示,便于统一浏览
1. 便利目录下,获取所有word
2. 将word读取,转化为字典
3. 构建Excel,将字典全部写入到Excel
from docx import Document
import xlsxwriter
import os,sys,shutil
'''
需求分析:遍历目录下所有的word,读取word,然后将读取的数据写入到Excel中,这样便于通过一个Excel表格,就可以浏览所有的word内容
'''
#定义word list
full_file_list = []
base_name = os.path.dirname(os.path.abspath(__file__))
word_path = os.path.join(base_name, 'word')
excel_name = 'test.xlsx'
excel_name_file = os.path.join(word_path,excel_name)
def word_path_list(word_path):
'''
:param word_path: word目录
:return:
'''
if os.path.isdir(word_path):
for file in os.listdir(word_path):
full_path = os.path.join(word_path,file)
if os.path.isdir(full_path):
word_path_list(full_path)
else:
if full_path.endswith('docx'):
full_file_list.append(full_path)
else:
continue
else:
return None
return full_file_list
def get_word_content(word_path):
'''
:param word_path: 定义word文本
:return:
'''
#数据结构
'''
{
'filename': ['file_content','file_content']
}
'''
data_json = {}
tmp = []
for li in word_path:
tmp = []
document = Document(li)
all_content = document.paragraphs
#获取每一段内容
for paragraph in all_content:
text = ' ' + paragraph.text
tmp.append(text)
data_json[li]=tmp
return data_json
def create_excel(data,excel_path):
'''
:param data: json data
:return:
'''
#创建Excel表
workbook = xlsxwriter.Workbook(excel_path)
#创建sheet
worksheet = workbook.add_worksheet('数据统计')
#定义表头
headers = {
'bold': True,
'font_name': 'SimHei',
'font_size': 12,
'border': True,
'align': 'center', #水平居中
'valign': 'vcenter', #垂直居中
'color': '#232323', #文字颜色
'bg_color': '#D9E1F2' #背景色
}
header = workbook.add_format(headers)
#定义正文字段格式
text = {
'font_name': 'SimHei',
'font_size': 10,
'border': True,
'align': 'left',
'color': '#232323',
'valign': 'center'
}
text_header = workbook.add_format(text)
#定义Excel格式
worksheet.write('A1','序号',header)
worksheet.write('B1','文件名称',header)
worksheet.write('C1','文件内容',header)
worksheet.set_row(0,30)
worksheet.set_column('C:C',180)
#定义写入单元
row = 1
col = 0
num = 1000
#开始写入
for k,v in data.items():
worksheet.write(row,col,str(num))
worksheet.write(row,col+1,str(k))
worksheet.write(row,col+2,str(v))
row+=1
num+=1
workbook.close()
print('表格创建完成',excel_path)
if __name__ == '__main__':
base_name = os.path.dirname(os.path.abspath(__file__))
word_path = os.path.join(base_name, 'word')
p_list = word_path_list(word_path)
data_json = get_word_content(p_list)
create_excel(data_json,excel_name_file)