图片爬取

记录  
图片爬取 整页  爬的不是4k   4k要会员
import requests  #http库
from lxml import etree #数据提取第三方库

url = 'http://pic.netbian.com/4kdongwu/'#地址
count = 1
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'
}#请求头

response = requests.get(url,headers=headers).content.decode('gbk')      #发送请求

html = etree.HTML(response)
clearfix = html.xpath('//div/ul[@class="clearfix"]/li/a/img/@src')

for i in clearfix:
    ID = i[16:-4]         #截取
    urls = 'http://pic.netbian.com/uploads/allimg/'+ID+'.jpg'   #图片地址
    img_response = requests.get(urls,headers=headers)
    f = open('./img/{}.jpg'.format(count),'ab')   #a 文件写入  追加方式 b 进制文件读写方式
    f.write(img_response.content)
    f.close()
    count +=1

你可能感兴趣的:(爬虫)