工作内容:
1.读取pdf文档内容
2.分页显示
3.每个数据后设置下拉框供手动打标签
4.数据录入txt文档
代码:
import math
import os
import tkinter as tk
from tkinter import *
import tkinter.ttk as ttk
from tkinter.messagebox import *
import time
import pdfplumber as pp
# request:pip install pdfplumber
class GUI(object):
def __init__(self):
print('begin time:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
self.resume_kinds = ('基础资料', '教育经历', '校园经历', '项目经历', '工作经历', '专业技能', '自我评价', '求职意向', '其他')
self.file_list = [i for i in os.listdir(r'D:\hk\微信资料\OA测试简历\OA测试简历') if i.endswith('.pdf')]
self.content, self.page_num = [], 0
self.lb_text, self.cmb = [], []
# 主窗口
self.root = Tk()
self.root.title('简历打标签')
self.root.geometry("680x620")
# 分页栏
self.tab_main = ttk.Notebook()
self.tab_main.place(relx=0.05, rely=0.1, relwidth=0.9, relheight=0.8)
# 文件下拉框
self.cmb_files = ttk.Combobox(self.root, state='readonly')
self.cmb_files['value'] = self.file_list
self.cmb_files.current(0)
self.cmb_files.place(relx=0.3, rely=0, relwidth=0.3, relheight=0.05)
# 文件选择提取按钮
self.extract_butt = Button(self.root, text='提取', command=lambda: extract_file(self.cmb_files.get()))
self.extract_butt.place(relx=0.61, rely=0, relwidth=0.05, relheight=0.05)
# 数据录入按钮
self.writein_butt = Button(self.root, text='录入', command=lambda: write_in())
self.writein_butt.place(relx=0.5, rely=0.91, relwidth=0.05, relheight=0.05)
# 文件选择提取按钮响应函数
def extract_file(file):
if file.endswith('.pdf'):
self.content, self.page_num = [], 0
self.lb_text, self.cmb = [], []
# 获取文本内容
path = 'D:\\hk\\微信资料\\OA测试简历\\OA测试简历\\' + file
print('path:', path)
pdf = pp.open(path)
pdf.metadata
pages = pdf.pages
for i in pages:
text = i.extract_text()
lines = text.splitlines()
self.content += lines
print(len(self.content))
self.page_num = math.ceil(len(self.content) / 21)
print('page_num:', self.page_num)
self.tab_main.destroy()
self.tab_main = ttk.Notebook()
self.tab_main.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.85)
for i in range(self.page_num):
tab = Frame(self.tab_main)
tab.pack()
labels_area = Frame(tab)
cmbs_area = Frame(tab)
labels_area.place(relx=0, rely=0, relwidth=0.9, relheight=1)
cmbs_area.place(relx=0.9, rely=0, relwidth=0.1, relheight=1)
self.tab_main.add(tab, text='%i' % (i + 1))
for j in range(21):
if i * 21 + j < len(self.content):
lb = Label(labels_area, text=self.content[i * 21 + j])
# lb.place(relx=0,rely=j/25,relwidth=1,relheight=1/25)
lb.pack(anchor=E)
self.lb_text.append(self.content[i * 21 + j].replace(' ',''))
self.cmb.append(ttk.Combobox(cmbs_area, state='readonly'))
self.cmb[-1]['value'] = self.resume_kinds
self.cmb[-1].current(0)
self.cmb[-1].pack()
# 数据录入按钮响应函数
def write_in():
confirm = askyesno('提示框', '是否录入数据?(此操作会影响文本录入信息)')
if confirm:
with open('data.txt', 'a', encoding='utf-8') as f:
for i in range(len(self.lb_text)):
f.write(self.cmb[i].get() + ' ' + self.lb_text[i] + '\n')
print('录入成功')
print('finish time:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
self.root.mainloop()
if __name__ == '__main__':
gui = GUI()
界面展示: