电脑壁纸?手机壁纸?还去各个壁纸网站上去搜索吗?现在不需要了!只需要选择想要的壁纸类型,然后就静静等待一会儿,大量壁纸就保存在你的电脑上,一个爬虫解决你的想要壁纸的烦恼。
该爬虫比较简单,很容易上手,通过接口的方式去获取图片链接地址,其中有正则的运用,不会正则的小伙伴可以去学习一下正则,因为这是爬虫领域很重要的东西,在数据清洗中占领着重要位置,好了,不多说,直接展示代码
import requests,re,os
class Downloadpucture(object):
def __init__(self):
#请求头
self.headers={
'Accept': 'text / html, application / xhtml + xml, application /'
' xml;q = 0.9, image / webp, image / apng, * / *;q = 0.8',
'Accept - Encoding': 'gzip, deflate',
'Accept - Language': 'zh - CN, zh;q = 0.9',
'Cache - Control': 'max - age = 0',
'Host': 'www.netbian.com',
'Upgrade - Insecure - Requests': '1',
'User - Agent': 'Mozilla / 5.0(WindowsNT10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / '
'70.0.3538.25Safari / 537.36Core / 1.70.3870.400QQBrowser / 10.8.4405.400'
}
#选择图片类型
def choosevarise(self):
list=["rili","dongman","fengjing","meinv","youxi","yingshi","dongtai","weimei","sheji","keai","qiche","huahui",
"dongwu","jieri","renwu","meishi","shuiguo","jianzhu","tiyu","junshi","feizhuliu","qita","wangzherongyao","huyan","lol"]
LIST=["0.日历","1.动漫","2.风景","3.美女","4.游戏","5.影视","6.动态","7.唯美","8.设计","9.可爱","10.汽车","11.花卉","12.动物",
"13.节日","14.人物","15.美食","16.水果","17.建筑","18.体育","19.军事","20.非主流","21.其他","22.王者荣耀","23.护眼","24.LOL"]
print(LIST[0:12])
print(LIST[13:25])
Downloadpucture.choosepath(self) #调用路径填写函数
Downloadpucture.choosenum(self) #调用图片类型选择函数
Downloadpucture.judge(self, number, LIST, list) #调用路径填写函数
#保存路径,主要对路径做一个判断,判断路径是否填写正确,如果该路径下存在文件夹则跳过,不存在则创建文件夹
def choosepath(self):
global PATH
while True: #对文件进行判定,文件夹后面是否带“/”,不带则主动添加“/”,因为这儿是为了下面图片下载函数能正确下载到填写的文件夹下
try:
try:
PATH=input("请输入保存路径,具体到某个文件夹:")
gz = r"/$"
rep = re.findall(gz, PATH)[0]
pass
if rep == "/":
pass
else:
pass
except:
PATH += "/"
folder=os.path.exists(PATH)
if not folder:
os.mkdir(PATH) #创建文件夹
break
else:
break
except:
print("路径错误,请仔细检查路径后重试!!")
print("图片保存路径:%s" % PATH)
#判断输入的序号是否正确
def choosenum(self):
global number
while True:
try:
number = int(input("请输入要下载的类型图序号:"))
if isinstance(number,int):
if 0<=number<=24:
break
else:
print("请输入正确序号!!!")
else:
print("请输入正确序号!!!")
except:
print("请输入正确序号!!!")
#对页面URL进行处理,主要是爬取的页面URL不一致,进行判断,获取URL
def judge(self,number,LIST,list):
global Url
kd = list[number]
print("你已选择:%s" % LIST[number])
for i in range(1, Downloadpucture.picturepages(self, kd, number) + 1):
if 0 <= number < 22:
Url = "http://www.netbian.com/%s/index_%d.htm" % (kd, i)
if i == 1:
Url = "http://www.netbian.com/%s/" % kd
else:
pass
elif 22 <= number <= 24:
Url = "http://www.netbian.com/s/%s/index_%d.htm" % (kd, i)
if i == 1:
Url = "http://www.netbian.com/s/%s/" % kd
else:
pass
Downloadpucture.picturenum(self)
#获取图片;类型下所有图片的二级链接
def indexdata(self):
rep = requests.get(url=Url, headers=self.headers)
return rep.text
#正则提取出二级链接下响应页面的三级地址
def picturenum(self):
data=Downloadpucture.indexdata(self)
zz=r'href="/desk/.*?.htm"'
global URl
for i in data.split():
try:
ret=re.search(zz,i).group()[12:17]
URl = 'http://www.netbian.com/desk/%s-1920x1080.htm' % ret
Downloadpucture.download(self,PATH)
except:
pass
#获取图片所有页数,找到该图片类型下所有的页数
def picturepages(self, kd,number):
if 0<=number<22:
req = requests.get(url="http://www.netbian.com/%s/" % kd, headers=self.headers).text
gz = r'.htm">.*?")[-2]
PAGE=re.match(r'\d{0,4}',NUM).group()
return int(PAGE)
else:
req = requests.get(url="http://www.netbian.com/s/%s/" % kd, headers=self.headers).text
gz = r'.htm">.*?")[-2]
PAGE=re.match(r'\d{0,4}',NUM).group()
return int(PAGE)
# 获取图片正式地址
def htmldata(self,URl):
re = requests.get(url=URl, headers=self.headers)
return re.text
# 响应数据处理,获取图片相应的url
def picturelink(self):
data = Downloadpucture.htmldata(self, URl).split()
list = []
for i in data:
if i[0:4] == "src=":
if i[-4:-1] == "jpg":
url = i[5:-1]
list.append(url)
else:
pass
else:
pass
return list[1]
# 下载图片
def download(self,PATH):
D = requests.get(Downloadpucture.picturelink(self), stream=True)
path = PATH + Downloadpucture.picturelink(self)[-10:-4] + ".jpg"
with open(path, "wb") as f:
f.write(D.content)
print(Downloadpucture.picturelink(self)[-10:-4] + ".jpg" + "下载完成!")
if __name__=="__main__":
a=Downloadpucture()
a.choosevarise()
成果真的很nice,再也不用对没有好看的壁纸而烦恼啦!