线程池
import time
import requests
from multiprocessing.dummy import Pool
from lxml import etree
class DoubanDumpPool(object):
def __init__(self):
self.base_url = "https://movie.douban.com/top250"
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
self.count = 0
def send_request(self, url):
time.sleep(2)
try:
response = requests.get(url)
self.analysis_data(response.content)
except Exception as e:
print e
def analysis_data(self, data):
html_data = etree.HTML(data)
name_list = html_data.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[1]/a/span[1]/text()')
for name in name_list:
print name
self.count += 1
def start_work(self):
start_time = time.time()
url_list = []
for page in range(0, 225 + 1, 25):
url = self.base_url + "?" + "start=" + str(page)
url_list.append(url)
print url
pools = Pool(len(url_list))
pools.map(self.send_request, url_list)
pools.close()
pools.join()
end_time = time.time()
all_time = end_time - start_time
print "总共电影%d" % self.count
print "总共的时间是%s" % all_time
if __name__ == '__main__':
tool = DoubanDumpPool()
tool.start_work()
gevent:
import time
import requests
from lxml import etree
import gevent
from gevent import monkey
monkey.patch_all()
class DoubanGeventTest1(object):
def __init__(self):
self.base_url = 'https://movie.douban.com/top250'
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
self.count = 0
def send_request(self, url):
time.sleep(2)
try:
response = requests.get(url, headers=self.headers)
self.analysis_data(response.content)
except Exception as e:
print e
def analysis_data(self, data):
html_data = etree.HTML(data)
name_list = html_data.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[1]/a/span[1]/text()')
for name in name_list:
print name
self.count += 1
def start_work(self):
start_time = time.time()
url_list = []
gevent_list = []
for page in range(0, 225 + 1, 25):
url = self.base_url + "?" + "start=" + str(page)
url_list.append(url)
gevents = gevent.spawn(self.send_request, url)
gevent_list.append(gevents)
gevent.joinall(gevent_list)
end_time = time.time()
all_time = end_time - start_time
print "总共电影%d" % self.count
print "总共的时间是%s" % all_time
if __name__ == '__main__':
tool = DoubanGeventTest1()
tool.start_work()