话不多说直接上代码
import requests
from bs4 import BeautifulSoup
import os
def download_image(url, save_directory):
# 发送GET请求
response = requests.get(url)
# 获取图片文件名
file_name = url.split("/")[-1]
# 拼接保存路径
save_path = os.path.join(save_directory, file_name)
# 保存图片到本地
with open(save_path, "wb") as file:
file.write(response.content)
print(f"图片 {file_name} 下载成功!")
def crawl_images(url, save_directory):
# 发送GET请求
response = requests.get(url)
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, "html.parser")
# 查找所有的标签
img_tags = soup.find_all("img")
# 遍历每个标签,获取图片URL并下载保存
for img_tag in img_tags:
try:
image_url = img_tag["src"]
download_image(image_url, save_directory)
except Exception as e:
print(f"图片下载失败: {e}")
# 设置要爬取的网页URL和保存图片的目录
url = "https://www.example.com"
save_directory = "images"
# 创建保存目录
os.makedirs(save_directory, exist_ok=True)
# 开始爬取图片
crawl_images(url, save_directory)
记得更改爬取的网址和保存目录
开头的两个库也要用pip获取