目录
前置:
代码:
视频:
1 本系列将以 “PyQt6实例_批量下载pdf工具”开头,放在 【PyQt6实例】 专栏
2 本系列涉及到的PyQt6知识点:
线程池:QThreadPool,QRunnable;
信号与槽:pyqtSignal,pyqtSlot;
界面:QTextEdit,QLabel,QLineText,QPushButton,QMainWindow,QWidget;
布局:QHBoxLayout,QVBoxLayout;
弹框:QFileDialog,QMessageBox。
3 本系列后续会在B站录制视频,到时会在文末贴出链接。本人还是建议先看博文,不懂的再看视频,这样效率高,节约时间。
import os
import sys,json
import traceback
from datetime import datetime
from pathlib import Path
import requests
from PyQt6.QtCore import (
QSize,Qt,QRunnable,QThreadPool,pyqtSignal,pyqtSlot,QObject
)
from PyQt6.QtWidgets import(
QApplication,
QMainWindow,
QLabel,
QLineEdit,
QPushButton,
QFileDialog,
QTextEdit,
QHBoxLayout,
QVBoxLayout,
QWidget,
QMessageBox
)
basedir = os.path.dirname(__file__)
class WorkerSignals(QObject):
finished = pyqtSignal(int)
error = pyqtSignal(tuple)
result = pyqtSignal(tuple)
pass
class Worker(QRunnable):
def __init__(self,thread_num:int,task_data:dict):
super().__init__()
self.thread_num = thread_num
self.task_data = task_data
self.signals = WorkerSignals()
self.is_stop = False
pass
@pyqtSlot()
def run(self):
try:
'''
每一个线程领一部分股票,执行下载任务
1 下载完一个股票,记录这个股票
2 当下载某个股票,下载一个pdf,记录pdfurl
断点信息 temp_dict {ticker:[pdfurl,pdfurl,...]}
股票列表 txt_list
txt所在目录 txt_dir
pdf存储位置 pdf_dir
'''
result = None
if self.task_data['temp_dict'] is not None:
temp_ticker_list = list(self.task_data['temp_dict'].keys())
pass
else:
temp_ticker_list = []
txt_list = self.task_data['txt_list']
txt_dir = self.task_data['txt_dir'] + os.path.sep
pdf_dir = self.task_data['pdf_dir'] + os.path.sep
executed_ticker_list = []
for txt_file in txt_list:
ticker = txt_file[0:6]
txt_file_path = txt_dir + txt_file
one_ticker_executed_url_list = []
with open(txt_file_path,'r',encoding='utf-8') as fr:
res_str = fr.read()
url_list = res_str.split('\n')
if ticker in temp_ticker_list:
one_ticker_executed_url_list = self.task_data['temp_dict'][ticker]
for one_url in one_ticker_executed_url_list:
url_list.remove(one_url)
tar_pdf_dir = pdf_dir + ticker + os.path.sep
if not os.path.exists(tar_pdf_dir):
os.mkdir(tar_pdf_dir)
for one_url in url_list:
if self.is_stop:
# 强制停止退出
result = (self.thread_num,'stoped',ticker,one_ticker_executed_url_list,executed_ticker_list)
break
pass
tar_file_name00 = one_url.split('/')
tar_file_name = f"{tar_file_name00[-2]}_{tar_file_name00[-1]}.pdf"
try:
res = requests.get(one_url)
if res.status_code == 200:
with open(tar_pdf_dir + tar_file_name,'wb') as fw:
fw.write(res.content)
pass
pass
except:
pass
one_ticker_executed_url_list.append(one_url)
pass
if self.is_stop:
if result is None:
result = (self.thread_num,'stoped',None,None,executed_ticker_list)
break
executed_ticker_list.append(ticker)
if not self.is_stop:
result = (self.thread_num,'success',None,None,executed_ticker_list)
pass
except Exception:
traceback.print_exc()
exctype,value = sys.exc_info()[:2]
self.signals.error.emit((self.thread_num,exctype,value,traceback.format_exc()))
pass
else:
self.signals.result.emit(result)
pass
finally:
self.signals.finished.emit(self.thread_num)
def stop_run(self):
self.is_stop = True
pass
pass
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle('PDF Downloader')
self.setMinimumSize(QSize(600,400))
label1 = QLabel('存储pdf网址txt文件所在目录:')
self.txtdir_lineedit = QLineEdit()
self.choicedir_btn = QPushButton('打开目录')
self.choicedir_btn.clicked.connect(self.choicedir_btn_clicked)
label2 = QLabel('设置下载后pdf存储目录:')
self.savedir_lineedit = QLineEdit()
self.savedir_btn = QPushButton('设置目录')
self.savedir_btn.clicked.connect(self.savedir_btn_clicked)
self.execute_btn = QPushButton('执行')
self.execute_btn.clicked.connect(self.execute_btn_clicked)
self.stop_btn = QPushButton('停止')
self.stop_btn.clicked.connect(self.stop_btn_clicked)
label3 = QLabel('运行日志:')
label4 = QLabel('其他信息:')
self.excutelog_textedit = QTextEdit()
self.excutelog_textedit.setReadOnly(True)
self.otherlog_textedit = QTextEdit()
self.otherlog_textedit.setReadOnly(True)
layout0 = QHBoxLayout()
layout0.addWidget(label1)
layout0.addWidget(self.txtdir_lineedit)
layout0.addWidget(self.choicedir_btn)
layout1 = QHBoxLayout()
layout1.addWidget(label2)
layout1.addWidget(self.savedir_lineedit)
layout1.addWidget(self.savedir_btn)
layout2 = QHBoxLayout()
layout2.addWidget(self.execute_btn)
layout2.addWidget(self.stop_btn)
layout3 = QVBoxLayout()
layout3.addWidget(label3)
layout3.addWidget(self.excutelog_textedit)
layout4 = QVBoxLayout()
layout4.addWidget(label4)
layout4.addWidget(self.otherlog_textedit)
layout5 = QHBoxLayout()
layout5.addLayout(layout3)
layout5.addLayout(layout4)
layout = QVBoxLayout()
layout.addLayout(layout0)
layout.addLayout(layout1)
layout.addLayout(layout2)
layout.addLayout(layout5)
widget = QWidget()
widget.setLayout(layout)
self.setCentralWidget(widget)
self.threadpool = QThreadPool()
self.max_thread_count = self.threadpool.maxThreadCount()
# 断点续传
self.open_init()
self.pre_last_time_data = {}
self.pre_last_time_executed_tickers = []
self.thread_finished_count = 0
self.waitting_close = False
# 标记本次是否执行过
self.this_time_start_yeah = False
self.runner_list = []
pass
def open_init(self):
'''
params.txt txt_dir;pdf_dir;str...
temp_dict temp.json
executed.txt
:return:
'''
self.create_empty_file()
with open(os.path.join(basedir,'data','params.txt'),'r',encoding='utf-8') as fr:
res_str = fr.read()
res_str = res_str.strip()
if len(res_str)>0:
res_list = res_str.split(';')
self.txtdir_lineedit.setText(res_list[0])
self.savedir_lineedit.setText(res_list[1])
self.otherlog_textedit.setText(res_list[2])
with open(os.path.join(basedir,'data','temp.json'),'r',encoding='utf-8') as fr:
self.last_time_data = json.load(fr)
with open(os.path.join(basedir,'data','executed.txt'),'r',encoding='utf-8') as fr:
tickers_str = fr.read()
tickers_str = tickers_str.strip()
if len(tickers_str)>0:
self.last_time_executed_tickers = tickers_str.split('\n')
else:
self.last_time_executed_tickers = None
pass
else:
self.last_time_data = None
self.last_time_executed_tickers = None
pass
def create_empty_file(self):
if not os.path.exists(os.path.join(basedir,'data')):
os.mkdir(os.path.join(basedir,'data'))
if not os.path.exists(os.path.join(basedir,'data','params.txt')):
with open(os.path.join(basedir,'data','params.txt'),'w',encoding='utf-8') as fw:
fw.write('')
if not os.path.exists(os.path.join(basedir, 'data', 'temp.json')):
with open(os.path.join(basedir, 'data', 'temp.json'), 'w', encoding='utf-8') as fw:
json.dump({}, fw)
if not os.path.exists(os.path.join(basedir, 'data', 'executed.txt')):
with open(os.path.join(basedir, 'data', 'executed.txt'), 'w', encoding='utf-8') as fw:
fw.write('')
pass
def choicedir_btn_clicked(self):
dir_name = QFileDialog.getExistingDirectory(self,"打开存储pdf网址txt文件所在目录")
if dir_name:
self.txtdir_lineedit.setText(dir_name)
pass
def savedir_btn_clicked(self):
dir_name = QFileDialog.getExistingDirectory(self, "指定下载后pdf存储的目录")
if dir_name:
self.savedir_lineedit.setText(dir_name)
pass
def execute_btn_clicked(self):
txt_dir = self.txtdir_lineedit.text()
if txt_dir is None or len(txt_dir.strip())<=0:
self.information_dialog('请先选择txt所在目录')
return
txt_list = os.listdir(txt_dir)
if len(txt_list)<=0:
self.information_dialog('txt所在目录为空')
return
pdf_dir = self.savedir_lineedit.text()
if pdf_dir is None or len(pdf_dir.strip())<=0:
self.information_dialog('请设置pdf存储目录')
return
answer = QMessageBox.question(
self,
'确认启动?',
f'如果确定启动,程序将把任务分成 {self.max_thread_count} 个线程执行。执行过程将占用设备资源。',
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
)
if answer == QMessageBox.StandardButton.Yes:
self.this_time_start_yeah = True
self.thread_finished_count = 0
self.txtdir_lineedit.setDisabled(True)
self.savedir_lineedit.setDisabled(True)
self.choicedir_btn.setDisabled(True)
self.savedir_btn.setDisabled(True)
self.execute_btn.setDisabled(True)
if self.last_time_executed_tickers is not None:
for one in self.last_time_executed_tickers:
res_txt_str = f'{one}.txt'
if res_txt_str in txt_list:
txt_list.remove(res_txt_str)
pass
# 分发任务
interval = len(txt_list)//self.max_thread_count
if interval == 0:
self.max_thread_count = 1
self.insert_executelog('需要执行的内容很少,只开启一个线程')
pass
for i in range(0,self.max_thread_count):
if i == self.max_thread_count-1:
node_txt_list = txt_list[i*interval:]
else:
node_txt_list = txt_list[i*interval:(i+1)*interval]
task_data = {
'txt_dir':txt_dir,
'pdf_dir':pdf_dir,
'txt_list':node_txt_list,
'temp_dict':self.last_time_data
}
worker = Worker(i,task_data)
worker.signals.result.connect(self.thread_result_fn)
worker.signals.finished.connect(self.thread_finished_fn)
worker.signals.error.connect(self.thread_error_fn)
self.runner_list.append(worker)
self.insert_otherlog(f'线程 {i} 启动。')
self.threadpool.start(worker)
pass
else:
return
pass
def thread_result_fn(self,res:tuple):
# (thread_num,stoped,ticker,executed_url_list,excuted_ticker_list)
# (thread_num,success,None,None,excuted_ticker_list)
thread_num = res[0]
status = res[1]
if status == 'stoped':
self.insert_otherlog(f'线程 {thread_num} 停止.')
if res[2] is not None:
self.pre_last_time_data[res[2]] = res[3]
self.pre_last_time_executed_tickers.extend(res[4])
pass
else:
self.insert_otherlog(f'线程 {thread_num} 正常结束。')
pass
def thread_finished_fn(self,res:int):
self.thread_finished_count += 1
res_str = f'线程 {res} 结束.'
self.insert_otherlog(res_str)
if self.thread_finished_count == self.max_thread_count:
temp_str = '上次执行正常结束'
if self.pre_last_time_data:
temp_str = '上次被强制停止'
with open(os.path.join(basedir,'data','temp.json'),'w',encoding='utf-8') as fw:
json.dump(self.pre_last_time_data,fw)
pass
if len(self.pre_last_time_executed_tickers)>0:
temp_str = '上次被强制停止'
tickers_str = '\n'.join(self.pre_last_time_executed_tickers)
with open(os.path.join(basedir,'data','executed.txt'),'w',encoding='utf-8') as fw:
fw.write(tickers_str)
pass
if temp_str == '上次执行正常结束':
if os.path.exists(os.path.join(basedir,'data','executed.txt')):
os.remove(os.path.join(basedir,'data','executed.txt'))
if os.path.exists(os.path.join(basedir,'data','temp.json')):
os.remove(os.path.join(basedir,'data','temp.json'))
pass
pre_str = f"{self.txtdir_lineedit.text()};{self.savedir_lineedit.text()};{temp_str}"
with open(os.path.join(basedir,'data','params.txt'),'w',encoding='utf-8') as fw:
fw.write(pre_str)
if self.waitting_close:
self.close()
else:
self.txtdir_lineedit.setDisabled(False)
self.savedir_lineedit.setDisabled(False)
self.choicedir_btn.setDisabled(False)
self.savedir_btn.setDisabled(False)
self.execute_btn.setDisabled(False)
self.information_dialog('所有工作线程停止完毕')
pass
pass
def thread_error_fn(self,res:tuple):
error_str = f"线程 {res[0]} 报错。报错类型:{res[1]}。值:{res[2]}。异常栈:{res[3]}"
self.insert_executelog(error_str)
pass
def stop_btn_clicked(self):
answer = QMessageBox.question(
self,
'确认停止?',
'确定要停止任务么?',
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
)
if answer == QMessageBox.StandardButton.Yes:
self.force_stop()
pass
else:
return
pass
def force_stop(self):
for item in self.runner_list:
item.stop_run()
pass
def insert_executelog(self,msg:str):
pre_str = f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {msg}"
self.excutelog_textedit.append(pre_str)
pass
def insert_otherlog(self,msg:str):
pre_str = f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {msg}"
self.otherlog_textedit.append(pre_str)
pass
def information_dialog(self,msg:str):
QMessageBox.information(
self,
'提示',
msg,
QMessageBox.StandardButton.Ok
)
pass
def closeEvent(self, a0):
answer = QMessageBox.question(
self,
'确认退出?',
'退出将中断操作,确定要退出么?',
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
)
if answer == QMessageBox.StandardButton.Yes:
if not self.this_time_start_yeah:
a0.accept()
else:
if self.thread_finished_count == self.max_thread_count:
a0.accept()
else:
QMessageBox.information(
self,
'提示',
'正在存储必要信息为下次断点续传做准备,需要等几秒后关闭',
QMessageBox.StandardButton.Ok
)
a0.ignore()
self.waitting_close = True
self.force_stop()
pass
pass
else:
a0.ignore()
pass
pass
pass
if __name__ == '__main__':
app = QApplication([])
window = MainWindow()
window.show()
app.exec()
pass
https://www.bilibili.com/video/BV1p8ZAYAEz3/