Python爬取瀑布流百度图片

Python爬去瀑布流百度图片

import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlencode
import json
import os

name = input("请输入需要爬取的图片:")
number = int(input("请输入要爬取的张数:"))

date={              #date为百度图片的链接的一些基本信息,通过f12可以查看,我们刷新图片,就可以看到出现新的网页代码,可以提取出这些信息,
    "tn": "resultjson_com",
    "ipn":"rj",
    "ct": 201326592,
    "fp": "result",
    "queryWord": "name",
    "cl": 2,
    "lm": -1,
    "ie": "utf-8",
    "oe": "utf-8",
    "word":"name",
    "pn": 0,
    'rn': 30,
}

def get_url(date):
    url="https://image.baidu.com/search/acjson?"+urlencode(date) #urlencode可以把date数据转化为url

   # print(url)#验证链接生成情况

    return url

def get_html(url):
    html=requests.get(url)

    #print(html.status_code)#验证网页是否爬取成功
    return html

def get_urllist(html):
    data=html.json()["data"]                 #json可以解析json格式网页源代码,是一个字典,可以提取出data,data中包含百度图片的链接信息


    #print(data[1])#验证数据获取情况
    return data

def get_picture(data):
    picture_urllist=[]
    for i in range(len(data)):
        try:
            picture_urllist.append(data[i]["middleURL"])    #data也是一个字典,很多键里面可以看到有链接,就是一张图片的链接,进行提取

        except:
            continue
    #print(picture_urllist)#判断图片URL是否成功获取

    return picture_urllist

def picture_write(picture_urllist,n):
    for i in range(len(picture_urllist)):
      #  try:

        path="/home/jin/life/picture/"+name+"/"+name+str(n)+".jpg"
        picture=requests.get(picture_urllist[i])
        with open(path,"wb") as file:
            file.write(picture.content)
            n+=1
            print("成功爬去第{}张图片".format(n))
        if n>=number:
            print(" 爬取成功")
            exit()
    return n
        # except:
        #     continue



def make_file(name):
    path="/home/jin/life/picture/"+name

    os.makedirs(path)             #os.makedires(path)可以创建路径为path的文件夹
    #print(path)#判断文件是否创建

def main():
    date["queryWord"]=name
    date["word"]=name
    n=0
    make_file(name)
    for i in range(0,10000,30):
        date["pn"]=i

        url1=get_url(date)

        html=get_html(url1)

        data=get_urllist(html)

        pictureurl=get_picture(data)
        n=picture_write(pictureurl,n)


main()

你可能感兴趣的:(Python爬虫,Python,爬虫,瀑布流网页,百度图片)