import requests
from lxml import etree
import re
import os
import threading
from multiprocessing import Pool
#模拟浏览器请求
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
name = input("请输入搜索的关键字:")
#1定义函数,获取用户访问的URL
def geturl():
url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1539393278843_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={}'.format(name)
return url
imageurl = geturl()
#2定义函数,获取目标网页源代码中图片的URL
def getimageurl():
rep = requests.get(imageurl,headers=headers)
rep.encoding='utf-8'
rep_result = rep.text
rep_re1 = r'"objURL":"(.*?)"'
re_result_jpg = re.findall(rep_re1,rep_result)
return re_result_jpg
saveimageurl = getimageurl()
#3保存图片到本地
def saveimage():
dir = name
if dir not in os.listdir():
os.makedirs(dir)
try:
for u in saveimageurl:
savename = u[-8:]
end = re.search('(.jpg|.jpeg|.png)$',savename) #搜索图片名的后缀
if end == None: #如果图片名的后缀为空
savename = savename + ".jpg" #给文件名加上jpg的后缀
req_url = requests.get(u,headers=headers).content
print("正在保存图片%s"%savename)
with open('./%s/%s'%(dir,savename),'wb') as filesave:
filesave.write(req_url)
except:
pass
if __name__ =='__main__':
saveimage()
print("文件保存在当前目录下的" + name + "文件夹下面")
用之前,要导入两个库:
pip install requests
pip install lxml