怎样用PyQt5.6 爬取网页

PyQt 5.6 以后选用chromium 是新一代QT用的浏览器引擎。。。与之前的Webkit有很大的区别。经过长时间的测试,终于可以用了!


# -*- coding: utf-8 -*-


import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineView

class Render(QWebEngineView):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.load(QUrl(url))
self.app.exec_()

def _loadFinished(self, result):
# This is an async call, you need to wait for this
# to be called before closing the app
self.page().toHtml(self.callable)

def callable(self, data):
self.html = data
# Data has been stored, it's safe to quit the app
self.app.quit()



import lxml.html

#定义一个网页地址
url = 'https://xxxxxxxxxxxx'

r = Render(url)
result = r.html
tree = lxml.html.fromstring(result)

参考下面的文章:


https://stackoverflow.com/questions/37754138/how-to-render-html-with-pyqt5s-qwebengineview



你可能感兴趣的:(PyQt)