from urllib.request import urlopen,Request,urlretrieve
from urllib.error import HTTPError
import re
import os
from bs4 import BeautifulSoup as bf
'''
爬取图片
使用正则表达式匹配标签
'''
def down(imgUrl,path):
img = urlopen(imgUrl)
img = img.read()
f = open(path, "wb")
f.write(img)
f.close()
def downLoadImg(imgUrl,name):
path = "D:/img/"
if not os.path.isdir(path):
os.makedirs(path)
path = '{}{}.jpg'.format(path, name)
# print("保存图片到本地:",path)
urlretrieve(imgUrl, path)
def saveImg(url):
print(url)
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
} # 头部信息
try:
request = Request(url, headers=header)
myURL = urlopen(request)
print("该网页是否存在:", myURL.getcode()) # 200
if myURL.getcode() == 200:
html = myURL.read().decode('gbk')
# print(html)
reg = '