python 批量保存网页中的超链接网址源代码

import urllib2
import time
import re
#connect to a URL
f1=open('all2.txt','a')
for page in range(5,194):
    #url= "https://www.hac-ker.net/search.php?var=That%20is%20me&page="+str(page)
    url= "http://www.example.com/archive?page="+str(page)
    website = urllib2.urlopen(url,timeout = 10)
    #read html code
    html = website.read()
    #use re.findall to get all the links
    #links = re.findall('"((http)s?://.*?)"', html)
    links = re.findall('>((http)s?://.*?)<', html)
    #ti=time.strftime('%y-%m-%d %H:%M:%S',time.localtime(time.time()))
    #f1.write(ti)
    #f1.write("\n\n")
    for i,b in links:
        f1.write(i)
        f1.write("\n")
    page+=1
    print page
    print "\n"
f1.close()

你可能感兴趣的:(python)