使用模块urllib
#coding:utf-8
import urllib.request
请求url,获取网页源代码
def getHtml(url):
h = urllib.request.urlopen(url).read()
return h
保存文档
def saveHtml(file_name,file_content):
with open (file_name,"wb") as f:
f.write( file_content )
循环访问并爬取网站内各网页源代码
for i in range(1,6365):
url='http://www.xxxx.com/home.php?mod=space&uid='
h=getHtml(url)
saveHtml('D:/工具/py脚本/pachong/html/%s.html'%i,h)
print ("结束")