import os
import json
import time
import traceback
import datetime
import urllib.request
from urllib.parse import urlparse
from seleniumwire import webdriver
class Downloader():
def __init__(self):
self.SAVE_QUERY_PARAMS = []
self.DOWNLOAD_DIR = os.getcwd()
self.TARGET_TYPE_DICT = {'application/json': '.json'}
self.driver = webdriver.Chrome()
def url_to_name(self, url):
"""
将url转换成文件名称,不含扩展名
:param url:
:return:
"""
unencode_url = urllib.request.unquote(url)
url_obj = urlparse(unencode_url)
scheme = url_obj.scheme
netloc = url_obj.netloc
path_str = url_obj.path.replace('/', '_')
query_dict = {}
for item in url_obj.query.split('&'):
if '=' not in item:
continue
key, value = item.split('=')
query_dict[key] = value
query_str = '&'.join(['{}={}'.format(key, query_dict.get(key)) for key in query_dict if key in SAVE_QUERY_PARAMS])
timestr = datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S.%f')
fname = timestr + path_str + ' ' + query_str
return fname
def save_requests(self, requests):
for request in requests:
if not request.response:
continue
# print(request)
content_type = request.response.headers['Content-Type']
if content_type not in self.TARGET_TYPE_DICT:
continue
request_headers = dict(request.headers)
host = request_headers.get('Host', '')
path = os.path.join(self.DOWNLOAD_DIR, host)
if not os.path.exists(path):
os.mkdir(path)
file_extension = self.TARGET_TYPE_DICT.get(content_type)
file_name = self.url_to_name(request.url).strip()
file_name = file_name + file_extension if file_extension not in file_name else file_name
file_path = os.path.join(path, file_name)
# print(request.response.body)
try:
rsp_json = json.loads(request.response.body)
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(rsp_json, f, ensure_ascii=False)
print("Saved:", file_path)
except Exception as e:
print(e)
print(traceback.print_exc())
if __name__ == "__main__":
downloader = Downloader()
print(downloader.DOWNLOAD_DIR)
downloader.driver.get('http://zjj.sz.gov.cn/ztfw/zfbz/grfw/lhgs/index.html')
time.sleep(20)
downloader.save_requests(downloader.driver.requests)
downloader.driver.close()
import os
from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QTableWidget, QTableWidgetItem, \
QPushButton, QLineEdit, QFileDialog, QCheckBox
from PyQt6.QtCore import Qt
from download import Downloader
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.downloader = Downloader()
self.setWindowTitle("请求抓取程序")
self.setGeometry(100, 100, 600, 400)
# 创建主窗口部件和布局
self.central_widget = QWidget(self)
self.main_layout = QVBoxLayout(self.central_widget)
# 创建按钮和文本框
self.start_button = QPushButton("开始", self.central_widget)
self.end_button = QPushButton("结束", self.central_widget)
self.folder_path_input = QLineEdit(self.central_widget)
self.browse_button = QPushButton("浏览", self.central_widget)
# 创建表格
self.table_widget = QTableWidget(self.central_widget)
self.table_widget.setColumnCount(3)
self.table_widget.setHorizontalHeaderLabels(["选择", "字段1", "字段2", "字段3"])
# 将部件添加到布局中
self.main_layout.addWidget(self.start_button)
self.main_layout.addWidget(self.end_button)
self.main_layout.addWidget(self.folder_path_input)
self.main_layout.addWidget(self.browse_button)
self.main_layout.addWidget(self.table_widget)
# 将主窗口设置为中央部件
self.setCentralWidget(self.central_widget)
# 为按钮添加点击事件处理函数
self.start_button.clicked.connect(self.start_button_clicked)
self.end_button.clicked.connect(self.end_button_clicked)
self.browse_button.clicked.connect(self.browse_button_clicked)
def browse_button_clicked(self):
# 打开文件夹选择对话框,并将选择的路径显示在文本框中
folder_path = QFileDialog.getExistingDirectory(self, "选择文件夹")
self.folder_path_input.setText(folder_path)
def start_button_clicked(self):
# 从文件夹中读取数据并显示在表格中
folder_path = self.folder_path_input.text()
# 清空表格
self.table_widget.clearContents()
# 读取文件夹中的文件
files = os.listdir(folder_path)
self.table_widget.setRowCount(len(files))
for row, file_name in enumerate(files):
# 将文件内容添加到表格中
checkbox = QCheckBox(self.table_widget)
self.table_widget.setCellWidget(row, 0, checkbox)
col_count = self.table_widget.columnCount()
for col in range(1, col_count):
item = QTableWidgetItem(file_name)
self.table_widget.setItem(row, col, item)
def end_button_clicked(self):
# 保存表格数据到文件或进行其他处理
rows = self.table_widget.rowCount()
cols = self.table_widget.columnCount()
if rows == 0 or cols == 0:
return
selected_rows = []
for row in range(rows):
checkbox = self.table_widget.cellWidget(row, 0)
if checkbox.isChecked():
values = [self.table_widget.item(row, col).text() for col in range(1, cols)]
selected_rows.append(values)
if not selected_rows:
return
print(selected_rows)
if __name__ == "__main__":
app = QApplication([])
window = MainWindow()
window.show()
app.exec()