python导入网址,采集网页内容
写得比较菜,多多指教!
# -*- coding: UTF-8 -*-
import urllib
import re
import time
#定义获取目标网页函数
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
#主函数(从1.txt读取url,并从读取的url的网页,保存在“当前时间”.txt)
if __name__=='__main__':
fp = open('1.txt','r')
for line in fp:
getpage = getHtml(line)
localtime = time.strftime("%Y%m%d%H%M%S",time.localtime())
filename = localtime + ".txt"
fo = open(filename, "w")
fo.write(getpage)
print "Get Success!"
fo.close()
fp.close()