# -*- coding: utf-8 -*-
"""
Created on Sat May 4 20:24:04 2019
@author: navy
"""
import re
import urllib
# 拿到页面数据
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
return html
# 通过正则表达式获取图片路径
def getImage(html):
reg = r'src="(.*?\.jpg)"' # r表示不转移 . 匹配任意字符 .*? 后面多个问号,代表非贪婪模式,也就是说只匹配符合条件的最少字符
pattern = re.compile(reg, re.I)
html = html.decode('gbk'); # 将字节转成字符串
result = re.findall(pattern, html);
count = 0;
for imgUrl in result:
print(imgUrl)
count = count + 1;
urllib.request.urlretrieve(imgUrl, "E:\python_workspaces\images\%s.jpg" % count, callbackfunc) #下载文件
print("下载完成,总共有:", count, "张图片");
def callbackfunc(blocknum, blocksize, totalsize):
"""
回调函数
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
"""
percent = 100.0 * blocknum * blocksize / totalsize
if percent > 100:
percent = 100
print("%.f%%" % percent)
if __name__ == '__main__':
html = getHtml("http://www.netbian.com/")
getImage(html)