import urllib
from urllib import request
# from lxml import etree
import os
import time
from lxml import etree
import requests
#导入线程 池模块
from multiprocessing.dummy import Pool as Treadpool
count=0 #变量对已经下载图片数量进行计数
root="G://doutuwang//"#图片保存的根目录文件夹
def get_url(page_url):
html = requests.get(page_url)
selector = etree.HTML(html.text)
"//*[@id='pic-detail']/div/div[2]/div[2]/ul/li/div/div/a[5]/img/@data-original"
img_div = selector.xpath("//*[@id='pic-detail']/div/div[2]/div[2]/ul/li/div/div")
print(len(img_div))
img_url=img_div[0].xpath("./a/img/@data-original")
for i in img_url:
# print(str(i))
download_img(str(i))
# print(img_url)
# download_img(img_url)
#将图片保存到本地
def download_img(url):
global count
path=root+url.split("/")[-1]#图片的名称
urllib.request.urlretrieve(url,filename=path)
print("第",count,"张已经保存完毕!")
count+=1
if __name__=='__main__':
page_url_list=[]
for i in range(1,20):
page_url = 'http://www.doutula.com/photo/list/?page='+str(i)
# get_url(page_url)
page_url_list.append(page_url)
#创建线程池
pool=Treadpool(8)
#线程开始时间
start_time=time.clock()
#线程映射
pool.map(get_url,page_url_list)
#线程池关闭
pool.close()
#主线程等待子线程
pool.join()
#线程结束时间
end_time=time.clock()
#计算爬去所用的总时间
time_total=end_time-start_time
print("总共耗时:",time_total,'s')