python爬虫项目-一见倾心壁纸

方法1

 1 import re
 2 import urllib
 3 import urllib.request
 4 
 5 def getHtml(url):
 6     page = urllib.request.urlopen(url)
 7     html = page.read()
 8     return html
 9 
10 def getImage(html,x):
11     #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzUc3B16KIY3uU53nkcTTDic8uEA4WWBPaHJ8LpibvAnkpS2FZtyjrv7w7dbEeNrhfvPuuyReNAxsLdgJA/640?wx_fmt=jpeg
12     #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzUc3B16KIY3uU53nkcTTDic8uEHqocI7r86nehl2NeForAqvcTiaEAIuWjTWPKNXnnXIPuUuqnuJeFKYw/640?wx_fmt=jpeg
13     #此处正则为重点
14     reg = 'data-src="(.*?)"'
15     image = re.compile(reg)
16     imlist = re.findall(reg,html.decode('utf-8'))
17 
18     print(imlist)
19     for i in imlist:
20         print(i)
21         print(x)
22         urllib.request.urlretrieve(i,'%s.jpg' % x)
23         x +=1
24     return x
25 x=1
26 url = 'https://mp.weixin.qq.com/s/MVDcn0O3093OlIhMYkqBIA'
27 html = getHtml(url)
28 x = getImage(html,x)
29 print('下载完成')
30 #下载结果与此.py文件在同一目录

方法2:BeautifulSoup 避免写正则表达式(因为不会)

  

import requests
import urllib.request
from bs4 import BeautifulSoup

url = "https://mp.weixin.qq.com/s/cm3Bua0UM1jbZnr2de7TWg"
r = requests.get(url)
demo = r.text
soup = BeautifulSoup(demo,"html.parser")

piclist=[]

for link in soup.find_all('img'):
    link_list = link.get('data-src')
    if link_list != None:
        piclist.append(link_list)
#print(piclist) 
    #print(type(link_list))

x = 0
for http in piclist:
    print(http)

    #F:\桌面\pa 是存储路径,需要先建立文件夹
    filesavepath = r'F:\桌面\pa\%s.jpg' % x  

    urllib.request.urlretrieve(http,filesavepath)
    x +=1
    print('正在保存第{:.0f}张图片'.format(x))
    
print('下载完成')   
    

 

 

 

 

你可能感兴趣的:(python爬虫项目-一见倾心壁纸)