使用PyQt5.9,由于H5才用延迟加载技术,所以如果直接用urlopen打开网页再用read读,读到的时JS未运行时的内容。
import sys
from PyQt5.QtWidgets import QApplication
class Render(QWebEngineView):
"""Render HTML with PyQt5 WebEngine."""
def __init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
# self.setHtml(html)
self.load(QUrl(html))
while self.html is None:
self.app.processEvents(
QEventLoop.ExcludeUserInputEvents |
QEventLoop.ExcludeSocketNotifiers |
QEventLoop.WaitForMoreEvents)
self.app.quit()
def _callable(self, data):
# print(data)
self.html = data
def _loadFinished(self, result):
self.page().toHtml(self._callable)
r=Render('www.baidu.com')
html = r.html
print(html)