基于Python实现的爬虫源码(1)

# -*- coding: utf-8 -*-

import string,urllib2
import os

import shutil


#os.rmdir("baidu_file")
path = os.getcwd()

#os.removedirs(path + '//baidu_file')

#shutil.copytree(ResDir,DesDir)#拷贝

Dir = path + '//baidu_file'
#如果有这个文件夹先删除
if os.path.exists(Dir) == True :
	shutil.rmtree(Dir)#删除文件夹
	

os.mkdir("baidu_file")#新建文件夹
os.chdir("baidu_file")#改变当前目录


def baidu(url,start_page,end_page):
	for i in range(start_page,end_page + 1):
		sName = string.zfill(i,5) + ".html"
		f = open(sName,'w+')
		m = urllib2.urlopen(url+str(i)).read()
		f.write(m)#将字符串写入文件,没有返回值。
		f.close()


print("请输入贴吧地址")
bdurl = "http://tieba.baidu.com/p/2296017831?pn="
start_page = 1;
end_page = 5

baidu(bdurl,start_page,end_page)

你可能感兴趣的:(python)