【自制实用小工具】——1、Xpath解析器
由于js脚本的影响,我们请求得到的数据常常与网页显示的数据不一样。而chrome插件xpath helper不能调试本地网页,于是有了制造一个xpath解析器的想法。(粗略尝试了一下,没有问题,大家要是发现bug的话记得评论告诉我啊~)
工具:
- PyQt5 库
- Qt designer
- sys 库
- requests 库
- lxml 库
步骤:
(一)用Qt designer设计界面
(二)将.ui文件转换为.py文件
有关(一)、(二)部分的教程可以参考:https://www.jb51.net/article/...
(三)链接按钮
将以下代码添加到def setupUi后面
# 设置按钮控件
self.button_Get_html.clicked.connect(self.Button_Get_Html)
self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)
(四)按钮事件
以下分别是按钮==Get Html==和按钮 ==Xpath Parse== 的代码:
def Button_Get_Html(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
}
url = self.text_Web_Site.toPlainText().strip()
if len(url):
if url[0] == 'w':
url = 'http://' + url
session = requests.session()
try:
res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
# 在text_HTML_Code中输出返回内容
self.text_HTML_Code.setPlainText(res)
except Exception as e:
self.text_HTML_Code.setPlainText(e.__str__())
else:
self.text_HTML_Code.setPlainText('网址不能为空!')
def Button_Xpath_Parse(self):
self.text_Result.document().clear()
xpath_syntax=self.text_Xpath_Syntax.toPlainText()
html_code=self.text_HTML_Code.toPlainText()
html=etree.HTML(html_code)
try:
results = html.xpath(xpath_syntax)
num = 0
for result in results:
self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个')
# result 有两种格式
try:
self.text_Result.append(result.text)
except Exception:
self.text_Result.append(result)
num=num+1
except Exception as e:
self.text_Result.setPlainText(e.__str__())
(五)初始化界面
if __name__ == '__main__':
# 每一pyqt5应用程序必须创建一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。
app = QtWidgets.QApplication(sys.argv)
# QWidget部件是pyqt5所有用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。
w = QtWidgets.QWidget()
ui = Ui_Asyu17_Xpath_Helper()
ui.setupUi(w)
w.show()
# 系统exit()方法确保应用程序干净的退出
# 的exec_()方法有下划线。因为执行是一个Python关键词。因此,exec_()代替
sys.exit(app.exec_())
结果展示:
测试无问题后,可使用pyinstaller将代码编译成可执行文件~
代码:
from PyQt5 import QtCore, QtGui, QtWidgets
import sys
import requests
from lxml import etree
requests.packages.urllib3.disable_warnings()
class Ui_Asyu17_Xpath_Helper(object):
def setupUi(self, Asyu17_Xpath_Helper):
Asyu17_Xpath_Helper.setObjectName("Asyu17_Xpath_Helper")
Asyu17_Xpath_Helper.resize(969, 905)
self.button_Xpath_Parse = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
self.button_Xpath_Parse.setGeometry(QtCore.QRect(830, 860, 75, 31))
self.button_Xpath_Parse.setObjectName("button_Xpath_Parse")
self.label = QtWidgets.QLabel(Asyu17_Xpath_Helper)
self.label.setGeometry(QtCore.QRect(10, 10, 71, 16))
self.label.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.label.setScaledContents(False)
self.label.setObjectName("label")
self.label_2 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
self.label_2.setGeometry(QtCore.QRect(490, 10, 51, 16))
self.label_2.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.label_2.setScaledContents(False)
self.label_2.setObjectName("label_2")
self.label_3 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
self.label_3.setGeometry(QtCore.QRect(20, 860, 91, 31))
self.label_3.setObjectName("label_3")
self.text_Xpath_Syntax = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
self.text_Xpath_Syntax.setGeometry(QtCore.QRect(110, 860, 681, 31))
font = QtGui.QFont()
font.setFamily("Arial")
font.setPointSize(13)
self.text_Xpath_Syntax.setFont(font)
self.text_Xpath_Syntax.setReadOnly(False)
self.text_Xpath_Syntax.setObjectName("text_Xpath_Syntax")
self.button_Get_html = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
self.button_Get_html.setGeometry(QtCore.QRect(830, 820, 75, 31))
self.button_Get_html.setObjectName("button_Get_html")
self.text_Web_Site = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
self.text_Web_Site.setGeometry(QtCore.QRect(110, 820, 681, 31))
font = QtGui.QFont()
font.setFamily("Arial")
font.setPointSize(13)
self.text_Web_Site.setFont(font)
self.text_Web_Site.setReadOnly(False)
self.text_Web_Site.setObjectName("text_Web_Site")
self.label_4 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
self.label_4.setGeometry(QtCore.QRect(20, 820, 91, 31))
self.label_4.setObjectName("label_4")
self.layoutWidget = QtWidgets.QWidget(Asyu17_Xpath_Helper)
self.layoutWidget.setGeometry(QtCore.QRect(10, 30, 951, 781))
self.layoutWidget.setObjectName("layoutWidget")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout.setObjectName("horizontalLayout")
self.text_HTML_Code = QtWidgets.QTextBrowser(self.layoutWidget)
self.text_HTML_Code.setEnabled(True)
font = QtGui.QFont()
font.setFamily("Arial")
font.setPointSize(12)
self.text_HTML_Code.setFont(font)
self.text_HTML_Code.setMouseTracking(False)
self.text_HTML_Code.setTabletTracking(False)
self.text_HTML_Code.setReadOnly(False)
self.text_HTML_Code.setObjectName("text_HTML_Code")
self.horizontalLayout.addWidget(self.text_HTML_Code)
self.text_Result = QtWidgets.QTextBrowser(self.layoutWidget)
font = QtGui.QFont()
font.setFamily("Arial")
font.setPointSize(12)
self.text_Result.setFont(font)
self.text_Result.setReadOnly(False)
self.horizontalLayout.addWidget(self.text_Result)
self.retranslateUi(Asyu17_Xpath_Helper)
QtCore.QMetaObject.connectSlotsByName(Asyu17_Xpath_Helper)
# 设置按钮控件
self.button_Get_html.clicked.connect(self.Button_Get_Html)
self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)
def retranslateUi(self, Asyu17_Xpath_Helper):
_translate = QtCore.QCoreApplication.translate
Asyu17_Xpath_Helper.setWindowTitle(_translate("Asyu17_Xpath_Helper", "Asyu17 Xpath Helper"))
self.button_Xpath_Parse.setText(_translate("Asyu17_Xpath_Helper", "Xpath Parse"))
self.label.setText(_translate("Asyu17_Xpath_Helper", "HTML Code:"))
self.label_2.setText(_translate("Asyu17_Xpath_Helper", "Result:"))
self.label_3.setText(_translate("Asyu17_Xpath_Helper", "Xpath Syntax:"))
self.button_Get_html.setText(_translate("Asyu17_Xpath_Helper", "Get Html"))
self.label_4.setText(_translate("Asyu17_Xpath_Helper", "Web Site:"))
def Button_Get_Html(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
}
url = self.text_Web_Site.toPlainText().strip()
if len(url):
if url[0] == 'w':
url = 'http://' + url
session = requests.session()
try:
res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
# 在text_HTML_Code中输出返回内容
self.text_HTML_Code.setPlainText(res)
except Exception as e:
self.text_HTML_Code.setPlainText(e.__str__())
else:
self.text_HTML_Code.setPlainText('网址不能为空!')
def Button_Xpath_Parse(self):
self.text_Result.document().clear()
xpath_syntax=self.text_Xpath_Syntax.toPlainText()
html_code=self.text_HTML_Code.toPlainText()
html=etree.HTML(html_code)
try:
results = html.xpath(xpath_syntax)
num = 0
for result in results:
self.text_Result.append('-'*60+'这里是第 '+str(num)+' 个')
# result 有两种格式
try:
self.text_Result.append(result.text)
except Exception:
self.text_Result.append(result)
num=num+1
except Exception as e:
self.text_Result.setPlainText(e.__str__())
if __name__ == '__main__':
# 每一pyqt5应用程序必须创建一个应用程序对象。sys.argv参数是一个列表,从命令行输入参数。
app = QtWidgets.QApplication(sys.argv)
# QWidget部件是pyqt5所有用户界面对象的基类。他为QWidget提供默认构造函数。默认构造函数没有父类。
w = QtWidgets.QWidget()
ui = Ui_Asyu17_Xpath_Helper()
ui.setupUi(w)
w.show()
# 系统exit()方法确保应用程序干净的退出
# 的exec_()方法有下划线。因为执行是一个Python关键词。因此,exec_()代替
sys.exit(app.exec_())
==微信公众号:==