代码仅供学习交流,请勿用于非法用途
import urllib.request
import re
import time
'''
@Author:王磊
@Time :2018/11/11 19:36:25
'''
'''图片起始索引'''
index = 0
'''请求头(注意:请求头里面的cookie请访问站点,F12请求头里面复制更换)'''
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0", "Cookie":"__tins__2318434=%7B%22sid%22%3A%201540469731009%2C%20%22vd%22%3A%202%2C%20%22expires%22%3A%201540471922827%7B; __51cke__=; __51laig__=2", "Host":"www.plantphoto.cn", "Upgrade-Insecure-Requests":"1"}
for i in range(1,10000):
'''range值大小为你需要的总页数-1'''
print("开始下载第%d页!" % i)
page = urllib.request.Request('http://www.plantphoto.cn/ashx/getphotopage.ashx?page=' + str(i) + '&n=2&group=sp&cid=13253', headers=header)
html_index = urllib.request.urlopen(page).read().decode('utf-8')
reg_pid = re.compile(r")
pid_list = re.findall(reg_pid, html_index)
for pid in pid_list:
url = "http://www.plantphoto.cn/ashx/getotherinfo.ashx?t=specimens&pid=" + pid + "&r=0.3258786330122887"
page_second = urllib.request.Request(url, headers=header)
html_second = urllib.request.urlopen(page_second).read().decode('utf-8')
reg_second = re.compile(r'"pid":"(.*?)"')
img_id = re.findall(reg_second, html_second)
for img in img_id:
urllib.request.urlretrieve("http://img.plantphoto.cn/image2/b/" + img + ".jpg", "C:\\Users\\asus\\Desktop\\n\\" + str(index) + ".jpg")
print("成功下载第%d张!" % index)
index += 1
time.sleep(5)
time.sleep(5)
print("over")
☞点击这里与我探讨☚
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪
♪♪后续会更新系列基于Python的爬虫小例子,欢迎关注。♪♪
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪