Python爬虫:抓取单个图片/多个图片

抓取单个图片:

#pics.py
"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup


def catch_onepicture(url,headers):
    response = requests.get(url=url,headers=headers)
    html = response.text
    soup = BeautifulSoup(html,'lxml')
    links = soup.find_all('a',class_='mainphoto')
    pattern = ''
    link = re.findall(string=str(links[0]),pattern=pattern)

    return link[0]
#main.py

import requests
import pics

headers ={
     
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',

}
url = 'https://movie.douban.com/photos/photo/2638827823/#title-anchor'

link = pics.catch_onepicture(url,headers)
print(link)

with open('image1.ico','wb') as file:
    file.write(requests.get(link).content)

抓取多个图片:

#pics.py

"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup


def catch_onepicture(url,headers,i):
    response = requests.get(url=url,headers=headers)
    html = response.text
    soup = BeautifulSoup(html,'lxml')
    links = soup.find_all('img',width=100)
    pattern = '.*?'
    link = re.findall(string=str(links[i]),pattern=pattern)

    return link[0]
#main.py
import pics
import requests
import time

headers ={
     
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',

}
url = 'https://movie.douban.com/top250'

for i in range(25):
    link = pics.catch_onepicture(url,headers,i=i)
    print(link)

    file_name = f"D:/imageFromSpiders/image{i+1}.ico"
    with open(file_name,'wb') as file:
        file.write(requests.get(link).content)

    time.sleep(1)

你可能感兴趣的:(Python爬虫学习笔记,python,爬虫)