Python自定义的多线程多进程类

最近经常使用到对大量文件进行操作的程序以前每次写的时候都要在函数中再写一个多线程多进程的函数,做了些重复的工作遇到新的任务时还要重写,因此将多线程与多进程的一些简单功能写成一个类,方便使用。功能简单只为以后方便使用。
使用中发现bug会再进行更新

class MyTMultithread(threading.Thread):
    '''
    自定义的线程函数,
    功能:使用多线程运行函数,函数的参数只有一个file,并且未实现结果值的返回
    args:
        filelist   函数的参数为列表格式,
        funname    函数的名字为字符串,函数仅有一个参数为file
        delay      每个线程之间的延迟,
        max_threads 线程的最大值
    '''

    def __init__(self, filelist, delay, funname, max_threads=50):
        threading.Thread.__init__(self)
        self.funname = funname
        self.filelist = filelist[:]
        self.delay = delay
        self.max_threads = max_threads

    def startrun(self):
        def runs():

            time.sleep(self.delay)

            while True:
                try:
                    file = self.filelist.pop()

                except IndexError as e:
                    break
                else:
                    self.funname(file)

        threads = []
        while threads or self.filelist:
            for thread in threads:
                if not thread.is_alive():
                    threads.remove(thread)
            while len(threads) < self.max_threads and self.filelist:
                thread = threading.Thread(target=runs)
                thread.setDaemon(True)
                thread.start()
                threads.append(thread)


class Mymultiprocessing(MyTMultithread):
    '''
    多进程运行函数,多进程多线程运行函数
    
    
    args:
        filelist   函数的参数为列表格式,
        funname    函数的名字为字符串,函数仅有一个参数为file
        delay      每个线程\进程之间的延迟,
        max_threads 最大的线程数
        max_multiprocess 最大的进程数
    
    '''

    def __init__(self, filelist, delay, funname, max_multiprocess=1, max_threads=1):

        self.funname = funname
        self.filelist = filelist[:]
        self.delay = delay
        self.max_threads = max_threads
        self.max_multiprocess = max_multiprocess
        self.num_cpus = multiprocessing.cpu_count()

    def multiprocessingOnly(self):
        '''
    只使用多进程
        '''
        num_process = min(self.num_cpus, self.max_multiprocess)
        processes = []
        while processes or self.filelist:
            for p in processes:
                if not p.is_alive():
                    # print(p.pid,p.name,len(self.filelist))
                    processes.remove(p)
            while len(processes) < num_process and self.filelist:
                try:
                    file = self.filelist.pop()
                    time.sleep(self.delay)

                except IndexError as e:
                    break
                else:
                    print(file)
                    p = multiprocessing.Process(target=self.funname, args=(file,))
                    p.start()
                    processes.append(p)

    def multiprocessingWithReturn(self):
        '''
        只使用 多进程 并且 获取返回结果
        :return:
        '''
        results = [0] * len(self.filelist)
        index_all = len(results)

        p = Pool(min(self.max_multiprocess, self.num_cpus))
        num_process = min(self.num_cpus, self.max_multiprocess)
        processes = []
        i = 0
        while processes or self.filelist:
            for p in processes:
                if not p.is_alive():
                    # print(p.pid,p.name,len(self.filelist))
                    processes.remove(p)
            while len(processes) < num_process and self.filelist:
                try:
                    file = self.filelist.pop()
                    i += 1
                    index = index_all - i
                    time.sleep(self.delay)

                except IndexError as e:
                    break
                else:
                    print(file)
                    results[index] = p.apply_async(self.funname, (file,))
                    # results.append(result)
                    # p.start()
                    # processes.append(p)
        return results
    def multiprocessingWithReturn_(self):
        '''
        只使用 多进程 并且 获取返回结果
        :return:
        '''
        results = [0] * len(self.filelist)
        index_all = len(results)

        p = Pool(min(self.max_multiprocess, self.num_cpus))
        num_process = min(self.num_cpus, self.max_multiprocess)
        processes = []
        i = 0
        while processes or self.filelist:
            for p in processes:
                if not p.is_alive():
                    # print(p.pid,p.name,len(self.filelist))
                    processes.remove(p)
            while len(processes) < num_process and self.filelist:
                try:
                    file = self.filelist.pop()
                    i += 1
                    index = index_all - i
                    time.sleep(self.delay)

                except IndexError as e:
                    break
                else:
                    print(file)
                    results[index] = p.map(self.funname, (file,))
                    # results.append(result)
                    # p.start()
                    # processes.append(p)
        return results

    def multiprocessingThreads(self):

        num_process = min(self.num_cpus, self.max_multiprocess)

        p = Pool(num_process)
        DATALISTS = []
        tempmod = len(self.filelist) % (num_process)
        CD = int((len(self.filelist) + 1 + tempmod) / (num_process))
        for i in range(num_process):
            if i == num_process:
                DATALISTS.append(self.filelist[i * CD:-1])
            DATALISTS.append(self.filelist[(i * CD):((i + 1) * CD)])

        try:
            processes = []
            for i in range(num_process):
                # print('wait add process:',i+1,time.clock())
                # print(eval(self.funname),DATALISTS[i])
                MultThread = MyTMultithread(DATALISTS[i], self.delay, self.funname, self.max_threads)

                p = multiprocessing.Process(target=MultThread.startrun())

                # print('pid & name:',p.pid,p.name)
                processes.append(p)

            for p in processes:
                print('wait join ')
                p.start()

            print('waite over')
        except Exception as e:
            print('error :', e)
        print('end process')


def func1(file):
    print(file)


if __name__ == '__main__':
    a = list(range(0, 97))
    '''
    测试使用5线程
    '''
    st = time.clock()
    asc = MyTMultithread(a, 0, func1, 5)
    asc.startrun()
    end = time.clock()
    print('*' * 50)
    print('多线程使用时间:', end - st)
    # 测试使用5个进程
    st = time.clock()
    asd = Mymultiprocessing(a, 0, func1, 5)
    asd.multiprocessingOnly()
    end = time.clock()
    print('*' * 50)
    print('多进程使用时间:', end - st)

    # 测试使用5进程10线程
    st = time.clock()
    multiPT = Mymultiprocessing(a, 0, func1, 5, 10)
    multiPT.multiprocessingThreads()
    end = time.clock()
    print('*' * 50)
    print('多进程多线程使用时间:', end - st)

运行结果如下:
下面分别是使用5个线程,5个进程,5个进程10个线程的使用时间情况:

Python自定义的多线程多进程类_第1张图片
多线程多进程.png

你可能感兴趣的:(Python自定义的多线程多进程类)