代码如下
import urllib2,httplib
import re
def getinfo(url):
header = {
'Host': 'drops.wooyun.org',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36',
'Referer': 'http://drops.wooyun.org/category/tips',
'Accept-Encoding': 'deflate',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Cookie': '__cfduid=d53915c99accdcad55597f1505f45d41d1437057232; wy_uid=05413VWUQtWG87uQ9rQqjQ1%2FBsoMcwzmmY6NUcmZ9GYd; wy_pwd=548chtxV21nx%2FTqMxuearSIbX%2BGf7OXFZbP8OSfT4ZXwbLGD5%2FQ5%2BYyOO%2FrVvGRxHqLDnS7rHOdfololbg; Hm_lvt_c12f88b5c1cd041a732dea597a5ec94c=1441114384,1441198545,1441638408,1441720289; PHPSESSID=2vsqsq4ta4phl8c8us6ekp1tg3; Hm_lvt_9fc41da6a2322bdd80563c9d5a4bdb1d=1441720306,1441801056,1441804034,1442054546; Hm_lpvt_9fc41da6a2322bdd80563c9d5a4bdb1d=1442055432; wordpress_logged_in_7065d11a793a3ec8482214fcc4f0a55b=%E7%8E%8B%E9%B9%8F%40HIT%7C1442228058%7C53c2ec073a1c556b783f94fcc013d4ec',
'If-Modified-Since': 'Sat, 12 Sep 2015 10:54:18 GMT'
}
req = urllib2.Request(url,None,header)
response = urllib2.urlopen(req)
htmlpage = response.read()
title = re.compile(r'title="Permanent Link to .*?">',re.DOTALL).findall(htmlpage)
title_num = len(title)
for i in range(title_num):
fobj = open('result.txt','a')
#print title[i][25:-2]
print >> fobj,title[i][25:-2]
fobj.close()
print "Finish getting "+url
if __name__=="__main__":
page_num = raw_input("Input the PageNum:")
page_num = int(page_num)
for i in range(1,page_num+1):
url="http://drops.wooyun.org/category/tips/page/"+str(i)
getinfo(url)
先写这么多