方法1
1 import re 2 import urllib 3 import urllib.request 4 5 def getHtml(url): 6 page = urllib.request.urlopen(url) 7 html = page.read() 8 return html 9 10 def getImage(html,x): 11 #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzUc3B16KIY3uU53nkcTTDic8uEA4WWBPaHJ8LpibvAnkpS2FZtyjrv7w7dbEeNrhfvPuuyReNAxsLdgJA/640?wx_fmt=jpeg 12 #https://mmbiz.qpic.cn/mmbiz_jpg/ib55rg6wzUc3B16KIY3uU53nkcTTDic8uEHqocI7r86nehl2NeForAqvcTiaEAIuWjTWPKNXnnXIPuUuqnuJeFKYw/640?wx_fmt=jpeg 13 #此处正则为重点 14 reg = 'data-src="(.*?)"' 15 image = re.compile(reg) 16 imlist = re.findall(reg,html.decode('utf-8')) 17 18 print(imlist) 19 for i in imlist: 20 print(i) 21 print(x) 22 urllib.request.urlretrieve(i,'%s.jpg' % x) 23 x +=1 24 return x 25 x=1 26 url = 'https://mp.weixin.qq.com/s/MVDcn0O3093OlIhMYkqBIA' 27 html = getHtml(url) 28 x = getImage(html,x)
29 print('下载完成')
30 #下载结果与此.py文件在同一目录
方法2:BeautifulSoup 避免写正则表达式(因为不会)
import requests import urllib.request from bs4 import BeautifulSoup url = "https://mp.weixin.qq.com/s/cm3Bua0UM1jbZnr2de7TWg" r = requests.get(url) demo = r.text soup = BeautifulSoup(demo,"html.parser") piclist=[] for link in soup.find_all('img'): link_list = link.get('data-src') if link_list != None: piclist.append(link_list) #print(piclist) #print(type(link_list)) x = 0 for http in piclist: print(http) #F:\桌面\pa 是存储路径,需要先建立文件夹 filesavepath = r'F:\桌面\pa\%s.jpg' % x urllib.request.urlretrieve(http,filesavepath) x +=1 print('正在保存第{:.0f}张图片'.format(x)) print('下载完成')