Python批量爬取堆糖网图片

import urllib.parse
import requests   #第三方请求库
import json      
import jsonpath  #处理json文件的的提取库
from bs4 import BeautifulSoup
import os
import urllib
import re
 
label = 'AI'
label = urllib.parse.quote(label)
#https://www.duitang.com/napi/blog/list/by_search/?kw=%E6%A0%A1%E8%8A%B1&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&start=24&_=1541772636388
url = 'https://www.duitang.com/search/?kw={}&start{}'
    
os.path.abspath('D:/Python/AI') 
for i in range(0, 2400, 24):  #进行翻页代码迭代
    u = url.format(label,i)   #获得翻页后的链接
    r = requests.get(u)
    print(len(r.text))
    print(r.text)
    print(r.encoding)
    soup = BeautifulSoup(r.text, 'html.parser')
    print(len(soup))
    se = soup.findAll('a',{'class':{'a'}})
    print(se)
          
    for ii in se:
        #print(ii.img.get('alt'),ii.img.get('src'))
        imagename = os.path.basename(ii.img.get('src'))  #返回文件名
        #print(imagename)
        imageurl = ii.img.get('src')  #返回图片链接名字
        #print(imageurl)
        # -*- coding: utf-8 -*-
        urllib.request.urlretrieve(imageurl,'D:/Python/AI/{}'.format(imagename))


 

 

你可能感兴趣的:(爬虫,爬取)