python多进程(multiprocessing)(map)

map的基本使用:
map函数一手包办了序列操作,参数传递和结果保存等一系列的操作。
from multiprocessing.dummy import Pool
poop = Pool(4)       # 4代表电脑是多少核的
results = pool.map(爬取函数,网址列表)
from multiprocessing.dummy import Pool as ThreadPool
import requests
import time

kv = {'user-agent':'Mozilla/5.0'}

def getsource(url):
    html = requests.get(url,headers=kv)

urls = []

for i in range(0,41):
    i = i*50
    newpage = 'https://tieba.baidu.com/f?kw=读书&ie=utf-8&pn=' + str(i)
    urls.append(newpage)

# 单线程爬取
time1 = time.time()
for each in urls:
    print(each)
    getsource(each)
time2 = time.time()
print('单线程耗时: ' + str(time2-time1))

# 多线程爬取
pool = ThreadPool(8)
time3 = time.time()
results  = pool.map(getsource, urls)
pool.close()
pool.join()
time4 = time.time()
print('多线程所消耗时间:' + str(time4 - time3))

python多进程(multiprocessing)(map)_第1张图片

你可能感兴趣的:(爬虫入门)