PyQt6+seleniumwire

import os
import json
import time
import traceback
import datetime
import urllib.request
from urllib.parse import urlparse

from seleniumwire import webdriver


class Downloader():

    def __init__(self):
        self.SAVE_QUERY_PARAMS = []
        self.DOWNLOAD_DIR = os.getcwd()
        self.TARGET_TYPE_DICT = {'application/json': '.json'}

        self.driver = webdriver.Chrome()

    def url_to_name(self, url):
        """
        将url转换成文件名称,不含扩展名
        :param url:
        :return:
        """
        unencode_url = urllib.request.unquote(url)
        url_obj = urlparse(unencode_url)
        scheme = url_obj.scheme
        netloc = url_obj.netloc
        path_str = url_obj.path.replace('/', '_')
        query_dict = {}
        for item in url_obj.query.split('&'):
            if '=' not in item:
                continue
            key, value = item.split('=')
            query_dict[key] = value
        query_str = '&'.join(['{}={}'.format(key, query_dict.get(key)) for key in query_dict if key in SAVE_QUERY_PARAMS])
        timestr = datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S.%f')
        fname = timestr + path_str + ' ' + query_str
        return fname

    def save_requests(self, requests):
        for request in requests:
            if not request.response:
                continue
            # print(request)
            content_type = request.response.headers['Content-Type']
            if content_type not in self.TARGET_TYPE_DICT:
                continue
            request_headers = dict(request.headers)
            host = request_headers.get('Host', '')
            path = os.path.join(self.DOWNLOAD_DIR, host)
            if not os.path.exists(path):
                os.mkdir(path)
            file_extension = self.TARGET_TYPE_DICT.get(content_type)
            file_name = self.url_to_name(request.url).strip()
            file_name = file_name + file_extension if file_extension not in file_name else file_name
            file_path = os.path.join(path, file_name)
            # print(request.response.body)
            try:
                rsp_json = json.loads(request.response.body)
                with open(file_path, 'w', encoding='utf-8') as f:
                    json.dump(rsp_json, f, ensure_ascii=False)
                    print("Saved:", file_path)
            except Exception as e:
                print(e)
                print(traceback.print_exc())


if __name__ == "__main__":
    downloader = Downloader()
    print(downloader.DOWNLOAD_DIR)

    downloader.driver.get('http://zjj.sz.gov.cn/ztfw/zfbz/grfw/lhgs/index.html')
    time.sleep(20)
    downloader.save_requests(downloader.driver.requests)
    downloader.driver.close()

import os
from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QTableWidget, QTableWidgetItem, \
    QPushButton, QLineEdit, QFileDialog, QCheckBox
from PyQt6.QtCore import Qt
from download import Downloader


class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()

        self.downloader = Downloader()

        self.setWindowTitle("请求抓取程序")
        self.setGeometry(100, 100, 600, 400)

        # 创建主窗口部件和布局
        self.central_widget = QWidget(self)
        self.main_layout = QVBoxLayout(self.central_widget)

        # 创建按钮和文本框
        self.start_button = QPushButton("开始", self.central_widget)
        self.end_button = QPushButton("结束", self.central_widget)
        self.folder_path_input = QLineEdit(self.central_widget)
        self.browse_button = QPushButton("浏览", self.central_widget)

        # 创建表格
        self.table_widget = QTableWidget(self.central_widget)
        self.table_widget.setColumnCount(3)
        self.table_widget.setHorizontalHeaderLabels(["选择", "字段1", "字段2", "字段3"])

        # 将部件添加到布局中
        self.main_layout.addWidget(self.start_button)
        self.main_layout.addWidget(self.end_button)
        self.main_layout.addWidget(self.folder_path_input)
        self.main_layout.addWidget(self.browse_button)
        self.main_layout.addWidget(self.table_widget)

        # 将主窗口设置为中央部件
        self.setCentralWidget(self.central_widget)

        # 为按钮添加点击事件处理函数
        self.start_button.clicked.connect(self.start_button_clicked)
        self.end_button.clicked.connect(self.end_button_clicked)
        self.browse_button.clicked.connect(self.browse_button_clicked)

    def browse_button_clicked(self):
        # 打开文件夹选择对话框,并将选择的路径显示在文本框中
        folder_path = QFileDialog.getExistingDirectory(self, "选择文件夹")
        self.folder_path_input.setText(folder_path)

    def start_button_clicked(self):
        # 从文件夹中读取数据并显示在表格中
        folder_path = self.folder_path_input.text()

        # 清空表格
        self.table_widget.clearContents()

        # 读取文件夹中的文件
        files = os.listdir(folder_path)
        self.table_widget.setRowCount(len(files))
        for row, file_name in enumerate(files):
            # 将文件内容添加到表格中
            checkbox = QCheckBox(self.table_widget)
            self.table_widget.setCellWidget(row, 0, checkbox)
            col_count = self.table_widget.columnCount()
            for col in range(1, col_count):
                item = QTableWidgetItem(file_name)
                self.table_widget.setItem(row, col, item)

    def end_button_clicked(self):
        # 保存表格数据到文件或进行其他处理
        rows = self.table_widget.rowCount()
        cols = self.table_widget.columnCount()

        if rows == 0 or cols == 0:
            return

        selected_rows = []
        for row in range(rows):
            checkbox = self.table_widget.cellWidget(row, 0)
            if checkbox.isChecked():
                values = [self.table_widget.item(row, col).text() for col in range(1, cols)]
                selected_rows.append(values)

        if not selected_rows:
            return
        print(selected_rows)

if __name__ == "__main__":
    app = QApplication([])
    window = MainWindow()
    window.show()
    app.exec()

你可能感兴趣的:(python,开发语言)