python操作word文档,表格中插入文字及照片

需求描述:

本文需求来自美女同事:一个excel里面存储着部门员工姓名,一个文件夹存储着各模块子文件夹,再下面存储着员工照片。目标是把员工姓名及入职日期从excel里面提取出来插入word表格,再找到对应照片插入相应位置。

解题思路:

一般来讲,操作word首选docx库,但是安装时用的是python-docx。因为涉及到插入图片,不得不考虑有些图片格式不合法,
因此用到了PIL对不合法的图片进行格式处理。
因为涉及到了批量操作,因此os库必不可少,迭代某文件夹下的内容。
还有一点就是excel里面的组织名称是正常的,但是照片存放的文件夹虽然也是组织名称命名,但多了一个数字序号,因此涉及到了一个简单的正则化匹配。

代码:

代码不算难,具体细节我不展开了,可以为有相似需求的同学做个参考。懂的都懂,不懂的欢迎讨论,上代码:

import os, sys
import re
import pandas as pd
from docx import Document
from docx.shared import Inches, Cm, Pt, RGBColor

import tkinter.messagebox
import tkinter.filedialog
import tkinter

from pathlib import Path
from PIL import Image


def image_to_jpg(image_path):
    path = Path(image_path)
    jpg_image_path = f'{path.parent / path.stem}.jpg'
    Image.open(image_path).convert('RGB').save(jpg_image_path)
    return image_path


def main(xls_path, pic_path, docx_path):

    df = pd.read_excel(xls_path, index_col=None)
    true_path = os.listdir(pic_path)
    results = []
    for ele in true_path:
        pattern = re.compile(r'\D+')  # 至少匹配一个非数字
        result = pattern.findall(ele)
        results.append(result[0])
    path_dict = dict(zip(results, true_path))  # 映射模块名称到文件夹名称
    
    docx = Document(docx_path)
    error_list = []
    for i in range(df.shape[0]):
        j = 4  # 每行4个人
        table_idx = int(i / j)
        cell_idx_col = 2 * (i % 4) + 1

        tables = docx.tables
        table = tables[table_idx]
        name = df.query('index==@i')['姓名'].item()
        yy = df.query('index==@i')['入职日期'].item().year
        mm = df.query('index==@i')['入职日期'].item().month
        yymm = f'{yy}{mm}月'
        who = table.cell(1,cell_idx_col).paragraphs[0].add_run(name)
        date = table.cell(2,cell_idx_col).paragraphs[0].add_run(yymm)

        who.font.name = '宋体'
        who.font.size = Pt(6)
        who.font.color.rgb = RGBColor(255, 255, 255)
        date.font.name = '宋体'
        date.font.size = Pt(6)
        date.font.color.rgb = RGBColor(255, 255, 255)

        module_name = df.query('index==@i')['三级组织'].item()
        module_name = path_dict[module_name]
        run = table.cell(0, 2*(i%4)).paragraphs[0].add_run()

        module_path = os.listdir(f'{pic_path}/{module_name}')
        for person in module_path:
            pattern = re.compile(r'\w+')
            res = pattern.findall(person)

            if name in res:
                person_path = f'{pic_path}/{module_name}/{person}'
                break

        try:
            # print(person_path)
            picture = run.add_picture(person_path)
            picture.height = Cm(4.2)
            picture.width = Cm(3.0)
        except:
            # print(f'{person_path} is not fit, try to transform...')
            image_to_jpg(person_path)
            picture = run.add_picture(person_path)
            picture.height = Cm(4.2)
            picture.width = Cm(3.0)
            # error_list.append(person_path)
            # print(f'there is no {person_path}')
    
    docx.save(save_path)

if __name__ == '__main__':

    root = tkinter.Tk()
    root.withdraw()

    # pic_path = './09-员工证件照/最新/'
    pic_path = tkinter.filedialog.askdirectory(title='选择照片存放的文件夹')
    if not pic_path:
        sys.exit(0)

    docx_path = tkinter.filedialog.askopenfilename(title='选择word模板', filetypes=[('word', '*.doc;*.docx')])
    if not docx_path:
        sys.exit(0)
    
    xls_path = tkinter.filedialog.askopenfilename(title='选择员工列表excel', filetypes=[('excel', '*.xlsx;*.xls')])
    if not xls_path:
        sys.exit(0)
    
    save_path = tkinter.filedialog.asksaveasfilename(title='word文件保存为...', filetypes=[('word', '*.doc;*.docx')])
    if not save_path:
        sys.exit(0)
    
    main(xls_path, pic_path, docx_path)

    tkinter.messagebox.showinfo('搞定', f'文件已存为 {save_path}')
    root.destroy()
    root.mainloop()

# Pyinstaller -F -w -i logo.ico insert_pic2word.py

你可能感兴趣的:(代码库,自动化,python,pandas,机器学习)