Python 多进程 协程爬虫例子(multiprocessing gevent)

import gevent
from gevent import monkey
monkey.patch_all()
import requests
from multiprocessing import Process,Queue
from gevent.pool import Pool
import datetime



def produce_url(q):
    for i in range(20):
        list = []
        for i in range(100):
            url = "http://www.baidu.com"
            list.append(url)
        q.put(list)


def spider(url):
    res = requests.get(url)
    print(res.status_code)

def consume_url(q,):
    pool = Pool(16)
    while True:
        if not q.empty():
            url_list = q.get()
            tasks = []
            for url in url_list:
                tasks.append(pool.spawn(spider,url))
            gevent.joinall(tasks)
        else:
            break

def main():
    queue = Queue()
    process_list = []
    produce_url(queue)
    for i in range(4):
        p = Process(target=consume_url,args=(queue,))
        p.start()
        process_list.append(p)
    for p in process_list:
        p.join()


if __name__ == '__main__':
    start_time = datetime.datetime.now().strftime("%c")
    main()
    end_time = datetime.datetime.now().strftime("%c")
    print("开始时间:",start_time)
    print("结束时间:",end_time)

你可能感兴趣的:(Python 多进程 协程爬虫例子(multiprocessing gevent))