import requests
from bs4 import BeautifulSoup
data = {}
import time
t0 = time.time()
url = "https://bing.ioliu.cn"
counter = 0
page = 1
while page<=102:
if page==1:
r = requests.get(url)
else:
r = requests.get(url+nextpage)
soup = BeautifulSoup(r.text)
des = soup.select("h3")
imgs = soup.select("img")
for idx,img in enumerate(imgs):
img_link = img.get("src")
description = des[idx].string
data[str(counter)] = [description,img_link]
counter += 1
nextpage = soup.find_all("a")[-3].get("href")
page += 1
t1 = time.time()
print("Time Consumption:",(t1-t0))
print("Records",len(data))
import os
from multiprocessing.dummy import Pool as ThreadPool
save_dir = os.path.expanduser("~/Pictures/Wallpaper") # 默认放到用户图片文件夹
if not os.path.exists(save_dir):
os.mkdir(save_dir)
img_links = list(data.values())
def process(item):
des,link = item
ff = link.split("/")
fname = ff[-1]
with open(os.path.join(save_dir,fname),"wb") as f:
f.write(requests.get(link).content)
t0 = time.time()
items = img_links
pool = ThreadPool()
pool.map(process, items)
pool.close()
pool.join()
t1 = time.time()
print("Time Consumption:",(t1-t0))
并发(multiprocessing)能够有效提高下载效率,避免原始for循环一张一张图片地下载。