爬取mzitu网站的girl图片
使用requests库请求链接获取响应
使用lxml库解析html获取资源链接
import requests
from lxml import html
etree = html.etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/77.0.3865.120 Safari/537.36'
, 'Referer': 'http://www.mzitu.com'
}
url = 'http://mzitu.com/tag/meitun/'
data = requests.get(url, headers=headers).text
s = etree.HTML(data)
file = s.xpath('//*[@id="pins"]/li')
i = 0
for div in file:
url_te = div.xpath('./a/@href')[0]
data_te = requests.get(url_te, headers=headers).text
s_te = etree.HTML(data_te)
page = int(s_te.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])
# 图片列表页
for x in range(1, page):
urls = url_te + '/' + str(x)
data_s = requests.get(urls, headers=headers).text
s_s = etree.HTML(data_s)
img_url = s_s.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img/@src')[0]
print('图片链接' + img_url)
r = requests.get(img_url, headers=headers)
# 保存图片至当前image目录下 也可使用绝对路径D:/image/ 此目录不会自动创建需手动创建
path = path = str('image/' + str(i) + '.png') # 此处路径需要修改
with open(path, 'wb') as f:
f.write(r.content)
i += 1