WekKit官网:http://www.webkit.org/
QtWebKit官网及安装:http://trac.webkit.org/wiki/QtWebKit#GettingInvolved
QtWebKit Class Reference:http://www.riverbankcomputing.co.uk/static/Docs/PyQt4/html/qtwebkit.html
QtWebKit也可以在PyQt4的安装中顺带安装(http://blog.csdn.net/xiarendeniao/article/details/6774520 46条)
1.用python的urllib库从服务端读取web页面(html+js)
#encoding=utf-8 import urllib, urlparse if __name__ == '__main__': baseUrl = 'http://s.weibo.com/weibo/' wordList = ['python','c++','钓鱼岛', '博圣云峰', '加勒比海盗', '海贼王', '2012', '世界末日', '地球'] for index in range(len(wordList)): url = urlparse.urljoin(baseUrl, urllib.quote(urllib.quote(wordList[index]))) print url conn = urllib.urlopen(url) data = conn.read() f = file('/tmp/%s' % (wordList[index]), 'w') f.write(data) f.close()2.用QtWebKit解析web页面(html+js)
#!/usr/bin/env python #encoding=utf-8 import sys from PyQt4.QtGui import * from PyQt4.QtCore import * from PyQt4.QtWebKit import * import time class Render(QWebPage): def __init__(self): self.wordList = ['python','c++','钓鱼岛', '博圣云峰', '加勒比海盗', '海贼王', '2012', '世界末日', '地球'] self.index = 0 self.app = QApplication(sys.argv) QWebPage.__init__(self) self.loadFinished.connect(self._loadFinished) self.mainFrame().setHtml(file('/tmp/%s'%self.wordList[self.index], 'r').read()) self.app.exec_() def _loadFinished(self, result): file('/home/dongsong/桌面/%s.html'%self.wordList[self.index],'w').write(unicode(self.mainFrame().toHtml()).encode('utf-8')) self.index += 1 if self.index >= len(self.wordList): self.app.quit() else: self.mainFrame().setHtml(file('/tmp/%s'%self.wordList[self.index], 'r').read()) page = Render()
export DISPLAY=:0 vpython qt_load_2.py