python爬取百度图片搜索_Python爬取百度图片

import urllib.request as urqt

import urllib.parse as urps

from urllib.parse import quote

import requests

import os

import re

import sys

def gethtml(url):

header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}

res = requests.Session()

res.headers = header

html = res.get(url, timeout = 10, allow_redirects = False).text

return html

def getbyte(url):

header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}

rep = urqt.Request(url, headers = header)

return urqt.urlopen(rep).read()

def makejpg(url, f):

f.write(getbyte(url))

f.close()

def getintofold(string):

os.chdir(r"D:\信息\python\一些成品\百度图片爬虫")

want = string + "图片"

wehave = os.listdir()

if want in wehave:

os.chdir(want)

else:

os.mkdir(want)

os.chdir(want)

def getall(num, url):

key = re.compile(r'"thumbURL":"(.*?)"')

tot = 0

now = url

while tot < num:

html = gethtml(now)

for now in re.findall(key, html):

tot += 1

if tot > num:

break;

f = open(str(tot) + ".jpg", "wb")

try:

makejpg(now, f)

except BaseException:

print("错误")

tot -= 1

continue;

print("第 " + str(tot) + " 个已下载")

if tot > num:

break

now = url + "&pn=" + str(tot)

def endd():

g.msgbox("感谢使用")

sys.exit()

def init():

now = input("请输入想要的图片: ")

num = input("请输入想要的数量: ")

getintofold(now)

now = quote(now, encoding = 'utf-8')

url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1599885698346_R&pv=&ic=0&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=" + now

getall(int(num), url)

init()

你可能感兴趣的:(python爬取百度图片搜索)