功能:根据关键词批量从doc、docx、pdf文件中筛选出包含所输入关键词的文件
那么开始上代码,不是专业python程序猿,代码写的不好勿喷,哈哈
from PyQt5.QtWidgets import * from PyQt5.QtGui import * from PyQt5.QtCore import * import sys, os import docx from docx import Document import os import shutil from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LAParams from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.pdfdevice import PDFDevice class Window(QDialog): def __init__(self, parent=None): super(Window, self).__init__(parent) self.path = '' self.initUI() self.setWindowTitle("文件小助手") self.resize(240, 200) def initUI(self): grid = QGridLayout() grid.addWidget(QLabel("源路径:"), 0, 0) self.pathLineEdit = QLineEdit() self.pathLineEdit.setFixedWidth(200) self.pathLineEdit.setText(self.path) grid.addWidget(self.pathLineEdit, 0, 1) button = QPushButton("选择文件夹") grid.addWidget(button, 0, 3) button.clicked.connect(self.msg) grid.addWidget(QLabel("输出路径:"), 1, 0) self.pathLineEdit1 = QLineEdit() self.pathLineEdit1.setFixedWidth(200) self.pathLineEdit1.setText(self.path) grid.addWidget(self.pathLineEdit1, 1, 1) button = QPushButton("选择文件夹") grid.addWidget(button, 1, 3) button.clicked.connect(self.msg1) # create textbox grid.addWidget(QLabel("关键字:"), 2, 0) self.textbox = QLineEdit(self) self.textbox.move(20, 20) self.textbox.resize(180, 30) grid.addWidget(self.textbox, 2, 1) # Create a button in the window self.button1 = QPushButton('点我开始干活儿', self) grid.addWidget(self.button1, 3, 1) self.setLayout(grid) fileDir = self.pathLineEdit.text() keyword = self.textbox.text() self.button1.clicked.connect(lambda : self.working(self.pathLineEdit,self.pathLineEdit1,self.textbox)) def msg(self): dir = QFileDialog.getExistingDirectory(self,"选取文件夹","./") # 起始路径 self.pathLineEdit.setText(dir) print(dir) def msg1(self): dir = QFileDialog.getExistingDirectory(self, "选取文件夹", "./") # 起始路径 self.pathLineEdit1.setText(dir) print(dir) #word 解析器 def readDoc(self,root,path,target,key): #将doc文件改为docx filename = path[-3:] if filename == 'doc': name = os.path.basename(path) os.rename(path,root+'/'+name+'x') path = path+'x' flag = False try: document = Document(path) except: return else: for paragraph in document.paragraphs: if key in paragraph.text: flag = True self.copyFile(target,path) break if flag == False: tables = document.tables for table in tables: # 行列个数 row_count = len(table.rows) col_count = len(table.columns) for i in range(row_count): for j in range(col_count): if key in table.cell(i, j).text: self.copyFile(target, path) break #pdf文件解析器 def readPdf(self,root,path,target,key): # 获取文档对象 fp = open(path, "rb") # 创建一个一个与文档关联的解释器 parser = PDFParser(fp) # PDF文档的对象 doc = PDFDocument() # 连接解释器和文档对象 parser.set_document(doc) doc.set_parser(parser) # 初始化文档,当前文档没有密码,设为空字符串 doc.initialize("") # 创建PDF资源管理器 resource = PDFResourceManager() # 参数分析器 laparam = LAParams() # 创建一个聚合器 device = PDFPageAggregator(resource, laparams=laparam) # 创建PDF页面解释器 interpreter = PDFPageInterpreter(resource, device) # 使用文档对象得到页面的集合 for page in doc.get_pages(): # 使用页面解释器读取 interpreter.process_page(page) # 使用聚合器来获得内容 layout = device.get_result() for out in layout: if hasattr(out, "get_text"): txt = out.get_text() if key in txt: self.copyFile(target,path) break # 复制文件 def copyFile(self, path, oldname): hasFile = os.path.exists(path) if hasFile == True: name = os.path.basename(oldname) shutil.copyfile(oldname, path + '/' + name) else: os.mkdir(path) name = os.path.basename(oldname) shutil.copyfile(oldname, path + '/' + name) # 开始干活儿 @pyqtSlot() def working(self,pathLineEdit1,pathLineEdit2,textbox): sourcedir = pathLineEdit1.text() targetdir = pathLineEdit2.text() key = textbox.text() msg = '处理好了' if sourcedir.strip() == '': msg = '源路径不能为空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return if targetdir.strip() == '': msg = '输出路径不能为空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return if key.strip() == '': msg = '关键字不能为空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return # 处理文件 if sourcedir.strip() != '' and targetdir.strip() != '' and key.strip() != '': flag = False for root, dirs, files in os.walk(sourcedir): for file in files: diricto = os.path.join(root, file) filetype = diricto[-4:] if 'doc' in filetype: self.readDoc(root, diricto, targetdir, key) flag = True if 'pdf' in filetype: self.readPdf(root, diricto, targetdir, key) flag = True if flag == False : msg = '源路径中没有word和pdf文件' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) if __name__ == '__main__': app = QApplication(sys.argv) dialog = Window() if dialog.exec_(): pass
工具演示效果图如下
工具下载链接: https://pan.baidu.com/s/1w7CQUAowSgR_d6V2h5OlwA 密码:kyuy
文末小福利免费视频资源网站:www.sousuohou.com