【自制实用小工具】——1、Xpath解析器

【自制实用小工具】——1、Xpath解析器

由于js脚本的影响,我们请求得到的数据常常与网页显示的数据不一样。而chrome插件xpath helper不能调试本地网页,于是有了制造一个xpath解析器的想法。(粗略尝试了一下,没有问题,大家要是发现bug的话记得评论告诉我啊~)
工具:

  1. PyQt5 库
  2. Qt designer
  3. sys 库
  4. requests 库
  5. lxml 库

步骤:

(一)用Qt designer设计界面

(二)将.ui文件转换为.py文件

有关(一)、(二)部分的教程可以参考:https://www.jb51.net/article/...

(三)链接按钮

将以下代码添加到def setupUi后面

 # 设置按钮控件
        self.button_Get_html.clicked.connect(self.Button_Get_Html)
        self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)

(四)按钮事件

以下分别是按钮==Get Html==和按钮 ==Xpath Parse== 的代码:

    def Button_Get_Html(self):

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
        }
        url = self.text_Web_Site.toPlainText().strip()
        if len(url):
            if url[0] == 'w':
                url = 'http://' + url
            session = requests.session()
            try:
                res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
                # 在text_HTML_Code中输出返回内容
                self.text_HTML_Code.setPlainText(res)
            except Exception as e:
                self.text_HTML_Code.setPlainText(e.__str__())
        else:
            self.text_HTML_Code.setPlainText('网址不能为空!')

    def Button_Xpath_Parse(self):
        self.text_Result.document().clear()

        xpath_syntax=self.text_Xpath_Syntax.toPlainText()
        html_code=self.text_HTML_Code.toPlainText()
        html=etree.HTML(html_code)
        try:
            results = html.xpath(xpath_syntax)
            num = 0
            for result in results:
                self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个')
                # result 有两种格式
                try:
                    self.text_Result.append(result.text)
                except Exception:
                    self.text_Result.append(result)
                num=num+1
        except Exception as e:
            self.text_Result.setPlainText(e.__str__())

(五)初始化界面

if __name__ == '__main__':
    # 每一pyqt5应用程序必须创建一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。
    app = QtWidgets.QApplication(sys.argv)
    # QWidget部件是pyqt5所有用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。
    w = QtWidgets.QWidget()
    ui = Ui_Asyu17_Xpath_Helper()
    ui.setupUi(w)
    w.show()

    # 系统exit()方法确保应用程序干净的退出
    # 的exec_()方法有下划线。因为执行是一个Python关键词。因此,exec_()代替
    sys.exit(app.exec_())

结果展示:

测试无问题后,可使用pyinstaller将代码编译成可执行文件~


代码:

from PyQt5 import QtCore, QtGui, QtWidgets
import sys
import requests
from lxml import etree

requests.packages.urllib3.disable_warnings()

class Ui_Asyu17_Xpath_Helper(object):
    def setupUi(self, Asyu17_Xpath_Helper):
        Asyu17_Xpath_Helper.setObjectName("Asyu17_Xpath_Helper")
        Asyu17_Xpath_Helper.resize(969, 905)
        self.button_Xpath_Parse = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
        self.button_Xpath_Parse.setGeometry(QtCore.QRect(830, 860, 75, 31))
        self.button_Xpath_Parse.setObjectName("button_Xpath_Parse")
        self.label = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label.setGeometry(QtCore.QRect(10, 10, 71, 16))
        self.label.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.label.setScaledContents(False)
        self.label.setObjectName("label")
        self.label_2 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_2.setGeometry(QtCore.QRect(490, 10, 51, 16))
        self.label_2.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.label_2.setScaledContents(False)
        self.label_2.setObjectName("label_2")
        self.label_3 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_3.setGeometry(QtCore.QRect(20, 860, 91, 31))
        self.label_3.setObjectName("label_3")
        self.text_Xpath_Syntax = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
        self.text_Xpath_Syntax.setGeometry(QtCore.QRect(110, 860, 681, 31))
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(13)
        self.text_Xpath_Syntax.setFont(font)
        self.text_Xpath_Syntax.setReadOnly(False)
        self.text_Xpath_Syntax.setObjectName("text_Xpath_Syntax")
        self.button_Get_html = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
        self.button_Get_html.setGeometry(QtCore.QRect(830, 820, 75, 31))
        self.button_Get_html.setObjectName("button_Get_html")
        self.text_Web_Site = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
        self.text_Web_Site.setGeometry(QtCore.QRect(110, 820, 681, 31))
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(13)
        self.text_Web_Site.setFont(font)
        self.text_Web_Site.setReadOnly(False)
        self.text_Web_Site.setObjectName("text_Web_Site")
        self.label_4 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_4.setGeometry(QtCore.QRect(20, 820, 91, 31))
        self.label_4.setObjectName("label_4")
        self.layoutWidget = QtWidgets.QWidget(Asyu17_Xpath_Helper)
        self.layoutWidget.setGeometry(QtCore.QRect(10, 30, 951, 781))
        self.layoutWidget.setObjectName("layoutWidget")
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.text_HTML_Code = QtWidgets.QTextBrowser(self.layoutWidget)
        self.text_HTML_Code.setEnabled(True)
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(12)
        self.text_HTML_Code.setFont(font)
        self.text_HTML_Code.setMouseTracking(False)
        self.text_HTML_Code.setTabletTracking(False)
        self.text_HTML_Code.setReadOnly(False)

        self.text_HTML_Code.setObjectName("text_HTML_Code")
        self.horizontalLayout.addWidget(self.text_HTML_Code)
        self.text_Result = QtWidgets.QTextBrowser(self.layoutWidget)
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(12)
        self.text_Result.setFont(font)
        self.text_Result.setReadOnly(False)

        self.horizontalLayout.addWidget(self.text_Result)

        self.retranslateUi(Asyu17_Xpath_Helper)
        QtCore.QMetaObject.connectSlotsByName(Asyu17_Xpath_Helper)

        # 设置按钮控件
        self.button_Get_html.clicked.connect(self.Button_Get_Html)
        self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)

    def retranslateUi(self, Asyu17_Xpath_Helper):
        _translate = QtCore.QCoreApplication.translate
        Asyu17_Xpath_Helper.setWindowTitle(_translate("Asyu17_Xpath_Helper", "Asyu17 Xpath Helper"))
        self.button_Xpath_Parse.setText(_translate("Asyu17_Xpath_Helper", "Xpath Parse"))
        self.label.setText(_translate("Asyu17_Xpath_Helper", "HTML Code:"))
        self.label_2.setText(_translate("Asyu17_Xpath_Helper", "Result:"))
        self.label_3.setText(_translate("Asyu17_Xpath_Helper", "Xpath Syntax:"))
        self.button_Get_html.setText(_translate("Asyu17_Xpath_Helper", "Get Html"))
        self.label_4.setText(_translate("Asyu17_Xpath_Helper", "Web Site:"))

    def Button_Get_Html(self):

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
        }
        url = self.text_Web_Site.toPlainText().strip()
        if len(url):
            if url[0] == 'w':
                url = 'http://' + url
            session = requests.session()
            try:
                res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
                # 在text_HTML_Code中输出返回内容
                self.text_HTML_Code.setPlainText(res)
            except Exception as e:
                self.text_HTML_Code.setPlainText(e.__str__())
        else:
            self.text_HTML_Code.setPlainText('网址不能为空!')

    def Button_Xpath_Parse(self):
        self.text_Result.document().clear()

        xpath_syntax=self.text_Xpath_Syntax.toPlainText()
        html_code=self.text_HTML_Code.toPlainText()
        html=etree.HTML(html_code)
        try:
            results = html.xpath(xpath_syntax)
            num = 0
            for result in results:
                self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个')
                # result 有两种格式
                try:
                    self.text_Result.append(result.text)
                except Exception:
                    self.text_Result.append(result)
                num=num+1
        except Exception as e:
            self.text_Result.setPlainText(e.__str__())

if __name__ == '__main__':
    # 每一pyqt5应用程序必须创建一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。
    app = QtWidgets.QApplication(sys.argv)
    # QWidget部件是pyqt5所有用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。
    w = QtWidgets.QWidget()
    ui = Ui_Asyu17_Xpath_Helper()
    ui.setupUi(w)
    w.show()

    # 系统exit()方法确保应用程序干净的退出
    # 的exec_()方法有下划线。因为执行是一个Python关键词。因此,exec_()代替
    sys.exit(app.exec_())

==微信公众号:==

你可能感兴趣的:(python)