抓取单个图片:
#pics.py
"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup
def catch_onepicture(url,headers):
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html,'lxml')
links = soup.find_all('a',class_='mainphoto')
pattern = ''
link = re.findall(string=str(links[0]),pattern=pattern)
return link[0]
#main.py
import requests
import pics
headers ={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
}
url = 'https://movie.douban.com/photos/photo/2638827823/#title-anchor'
link = pics.catch_onepicture(url,headers)
print(link)
with open('image1.ico','wb') as file:
file.write(requests.get(link).content)
抓取多个图片:
#pics.py
"""this is for catching pictures"""
import requests
import re
from bs4 import BeautifulSoup
def catch_onepicture(url,headers,i):
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html,'lxml')
links = soup.find_all('img',width=100)
pattern = ''
link = re.findall(string=str(links[i]),pattern=pattern)
return link[0]
#main.py
import pics
import requests
import time
headers ={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
}
url = 'https://movie.douban.com/top250'
for i in range(25):
link = pics.catch_onepicture(url,headers,i=i)
print(link)
file_name = f"D:/imageFromSpiders/image{i+1}.ico"
with open(file_name,'wb') as file:
file.write(requests.get(link).content)
time.sleep(1)