爬取堆糖蜜桃猫图片并下载到本地

爬去蜜桃猫

大家可以打开堆糖网,不需要登录,然后搜索蜜桃猫,就会发现许多可爱的蜜桃猫和他的小对象,我们这次的任务就是要爬取蜜桃猫的日常生活。下面是爬取的代码,我们先提取了每个图片的链接,然后进行了下载。由于搜索到的结果是Ajax,所以需要我们通过json库来进行爬取。然后用正则表达式来为每个图片创建自己的名称。最后进行保存。

import requests
import re
from requests.exceptions import MissingSchema
import json
url = []
def get_page():

    url = 'https://www.duitang.com/napi/blog/list/by_search/?kw=%E8%9C%9C%E6%A1%83%E7%8C%AB&type=feed&start=0&limit=100'

    try:
        resp = requests.get(url)
        #print(url)
        if 200  == resp.status_code:
            #print(resp.json())
            return( resp.json())
    except requests.ConnectionError:
        return None


def get_images(json):

    if json.get('data'):
        data = json.get('data')
        #print(data)
        object_list = data.get('object_list')
        for list in object_list:
            image2 = list.get("photo")
            images = image2.get("path")
            save_image(images)



def save_image(url):


    response = requests.get(url)
    picture = response.content
    "https://b-ssl.duitang.com/uploads/item/201903/13/20190313105834_vtvmt.jpeg"
    abc = re.match("^http.*?_(.*?)$",str(url))
    #print("aa")
    #print(abc.group(1))
    num = abc.group(1)
    path = "d:/py/duitangmitaomao/p/"+str(num)
    with open(path, "wb") as f:
        f.write(picture)






def main():

        json = get_page()
        get_images(json)
        save_image(url)

main()

你可能感兴趣的:(Python)