一直在尝试将下载数据的速度提高,于是利用了Python里面多线程的技术(当然是简单的map方法),想不到速度快了一倍!
#-*—coding:utf8-*-
import requests
import re
import time
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
hea = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'}
si = 0
time1 = time.time()
for i in range(2190,2206):
my_url = 'http://jandan.net/ooxx/page-2206#comments'
url = re.sub('page-(.*?)#comments','page-%d#comments'%i,my_url)
# print url
# html = requests.get('http://jandan.net/ooxx/page-2205#comments',headers = hea)
html = requests.get(url,headers = hea)
html.encoding = 'utf-8'
target = re.findall('
',html.text)
for each in target:
# print each
# print 'now downloading:' + each
pic = requests.get(each)
fp = open('test\\' + str(si) + '.jpg', 'wb')#wb代表是写入二进制文件
fp.write(pic.content)
fp.close()
si += 1
time2 = time.time()
print u'单线程耗时:'+str(time2-time1)
多线程比单线程下载同样的图片快了一倍!
#-*—coding:utf8-*-
from multiprocessing.dummy import Pool as ThreadPool
import requests
import re
import time
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
hea = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'}
def download(url):#下载操作函数
si = 0
html = requests.get(url, headers=hea)
html.encoding = 'utf-8'
target = re.findall('
', html.text)
for each in target:
# print each
# print 'now downloading:' + each
pic = requests.get(each)
fp = open('test\\' + str(si) + '.jpg', 'wb') # wb代表是写入二进制文件
fp.write(pic.content)
fp.close()
si += 1
def addr_list():#将网页地址列表化
urls = []
for i in range(2190, 2206):
my_url = 'http://jandan.net/ooxx/page-2206#comments'
url = re.sub('page-(.*?)#comments', 'page-%d#comments' % i, my_url)
urls.append(url)#利用for循环将生成的网页地址列表化
return urls
def multiDownload():
time1 = time.time()
urls = addr_list()
pool = ThreadPool(4)
results = pool.map(download,urls)
pool.close()
pool.join()
time2 = time.time()
print u'多线程耗时:'+str(time2-time1)
multiDownload()