io密集型,多进程,多线程的区别

为什么80%的码农都做不了架构师?>>>   hot3.png

"""
io密集,多线程请求网页,或则读写文件,计算时间,计算密集
join所完成的工作就是线程同步,即主线程任务结束之后,进入阻塞状态,一直等待其他的子线程执行结束之后,主线程在终止,例子见下面三
join有一个timeout参数:
当设置守护线程时,含义是主线程对于子线程等待timeout的时间将会杀死该子线程,最后退出程序。所以说,
如果有10个子线程,全部的等待时间就是每个timeout的累加和。简单的来说,就是给每个子线程一个timeout的时间,让他去执行,时间一到,不管任务有没有完成,直接杀死。
没有设置守护线程时,主线程将会等待timeout的累加和这样的一段时间,时间一到,主线程结束,但是并没有杀死子线程,子线程依然可以继续执行,直到子线程全部结束,程序退出
"""
from simple_spider_rule.settings import tasks,headers
import time
import requests
import logging
import threading
import multiprocessing

logging.basicConfig(filename="",level=logging.INFO)
qa = multiprocessing.Queue()

def get_some(clist):
    for alist in clist:
        qa.put(alist)
    return qa

def defined_requetsts(url,headers):
    res = requests.get(url=url, headers=headers, timeout=10)
    response = res.text
    return response

class MyRequests(object):
    def __init__(self):
        self.len_data = list()
        self.q = get_some(tasks)

    def fetcher(self):
        pass

    def run(self):
        return_data = self.fetcher()
        logging.info(f"一个线程七条数据所用时间为{return_data}")

class OrdinaryRequests(MyRequests):

    def __init__(self):
        super().__init__()

    def fetcher(self):
        start_time = time.time()
        while  self.q.qsize():
            one_task = self.q.get()
            try:
                data = defined_requetsts(url=one_task, headers=headers)
            except Exception as e:
                logging.debug("请求出现异常")
                continue
            self.len_data.append(len(data))
        end_time = time.time()
        return end_time-start_time


class ThreadRequests(MyRequests):

    def __init__(self):
        super().__init__()

    def fetcher(self):
        while self.q.qsize():
            one_task = self.q.get()
            try:
                data = defined_requetsts(url=one_task, headers=headers)
            except Exception as e:
                logging.debug("请求出现异常")
                break
            self.len_data.append(len(data))

    def for_range(self):
        threads = []
        threadNum = 4
        start_time = time.time()
        for i in range(0,threadNum):
            t = threading.Thread(target=self.fetcher)           #如果需要传递参数时, t = threading.Thread(target=fetchUrl, args=(urlQueue,))  参数以数组的形式进行传递
            threads.append(t)
        for oneth in threads:
            # oneth.setDaemon(True)     这一句默认是false,当设置为True时,主线程结束,所有线程都结束
            oneth.start()           #每个线程开始运行
        for oneth in threads:
            oneth.join()            #给每个线程设置守护线程
        end_time = time.time()
        return end_time-start_time

    def run(self):
        cdata = self.for_range()
        logging.info(f"四个线程七条数据,所用时间为{cdata}")


class ProcessRequest(MyRequests):


    def fetcher(self):
        while self.q.qsize():
            one_task = self.q.get()
            try:
                data = defined_requetsts(url=one_task, headers=headers)
            except Exception as e:
                logging.debug("请求出现异常")
                continue
            self.len_data.append(len(data))

    def process_get(self):
        processsing_list = []
        startTime = time.time()
        for i in range(multiprocessing.cpu_count()):
            task = multiprocessing.Process(target=self.fetcher)
            processsing_list.append(task)
        for pro in processsing_list:
            pro.start()
        for pro in processsing_list:
            if pro.is_alive():
                pro.join()
        endTime = time.time()
        return endTime - startTime

    def run(self):
        atime = self.process_get()
        logging.info(f"四个进程程七条数据,所用时间为{atime}")

if __name__ == '__main__':
    mytr = ThreadRequests()
    mytr.run()
    myor = OrdinaryRequests()
    myor.run()
    mypr = ProcessRequest()
    mypr.run()


执行结果


tasks = ["http://www.gdep.gov.cn/wgk/jc/201901/t20190104_247520.html",
         "http://www.gdep.gov.cn/wgk/jc/201808/t20180830_242337.html",
         "http://www.gdep.gov.cn/wgk/jc/201808/t20180830_242343.html",
         "http://www.gdep.gov.cn/wgk/jc/201808/t20180809_241534.html",
         "http://www.gdep.gov.cn/wgk/jc/201807/t20180724_240933.html",
         "http://www.gdep.gov.cn/wgk/jc/201807/t20180730_241073.html",
         "http://www.gdep.gov.cn/wgk/jc/201811/t20181113_245623.html", ]

headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/"
                      "537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
    }

转载于:https://my.oschina.net/mypeng/blog/3007791

你可能感兴趣的:(io密集型,多进程,多线程的区别)