Python3读取加载并执行JS后的内容

使用PyQt5.9,由于H5才用延迟加载技术,所以如果直接用urlopen打开网页再用read读,读到的时JS未运行时的内容。

import sys


from PyQt5.QtCore import QEventLoop,QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView

from PyQt5.QtWidgets import QApplication

class Render(QWebEngineView):
    """Render HTML with PyQt5 WebEngine."""

    def __init__(self, html):
        self.html = None
        self.app = QApplication(sys.argv)
        QWebEngineView.__init__(self)
        self.loadFinished.connect(self._loadFinished)
#         self.setHtml(html)
        self.load(QUrl(html))
        while self.html is None:
            self.app.processEvents(
                QEventLoop.ExcludeUserInputEvents |
                QEventLoop.ExcludeSocketNotifiers |
                QEventLoop.WaitForMoreEvents)
        self.app.quit()
        
    def _callable(self, data):
#         print(data)
        self.html = data
        
    def _loadFinished(self, result):
        self.page().toHtml(self._callable)

r=Render('www.baidu.com')
html = r.html  
print(html)

你可能感兴趣的:(Python)