原文地址链接: http://blog.sina.com.cn/s/blog_5e32cc130100fszx.html
2, 【用Python写爬虫】获取html的方法【二】:使用pycurl
# Pycurl参考地址:http://pycurl.sourceforge.net/
# Pycurl下载地址:http://pycurl.sourceforge.net/download/pycurl-7.18.1.tar.gz
#!/usr/bin/python #coding:utf8 import pycurl import StringIO def getURLContent_pycurl(url): c=pycurl.Curl() c.setopt(pycurl.URL,url) b=StringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION,b.write) c.setopt(pycurl.FOLLOWLOCATION,1) c.setopt(pycurl.MAXREDIRS,5) c.perform() return b.getvalue() url='http://www.baidu.com' content = getURLContent_pycurl(url) print content
3,【用Python写爬虫】获取html的方法【三】:使用cPAMIE
# cPAMIE下载:http://sourceforge.net/project/showfiles.php?group_id=103662
1 #!/usr/bin/python 2 #coding:utf8 3 4 import cPAMIE 5 6 def getURLContent_cPAMIE(url): 7 g_ie = cPAMIE.PAMIE() 8 g_ie = showDebugging = False 9 g_ie.frameName = None 10 g_ie.navigate(url) 11 12 content = g_ie.pageGetText() 13 g_ie.quit() 14 return content 15 16 url = 'http://www.baidu.com' 17 18 content = getURLContent_cPAMIE(url) 19 20 print content