A Dummy Task Executor

Dummy Task Executor for parallel execution of tasks with complex dependency relationships

(Just a demo for demonstration purpose, much to be improved)

File List: parallel_exec.py input output

#!/usr/bin/env python

'''
a demo illustrating parallel execution of tasks
with complex dependency relationships
'''

import re
import sys, os
import time
from multiprocessing import Process, Lock, Value, Queue

def con_dic(inputfile):
    '''
    construct a dictionary from inputfile
    which should has the form of
    {taskname:[dep-task list], ...}
    and return the dictionary
    '''
    dic = {}
    line_pattern = re.compile(r'\[(.+?)\](.*)')
    for line in inputfile:
        line = line.strip()     # strip leading and following blanks
        m = line_pattern.match(line)
        if not m:
            print 'Not Match: %s' % line
            return None
        else:
            taskname = m.group(1)
            remain = m.group(2)
            if taskname in dic:
                pass
            else:
                dic[taskname] = [] # an empty dep list
            remain = remain.strip() # strip blanks
            if len(remain)==0:
                pass
            elif remain.startswith('<='):
                # extract the dependent task list
                remain = remain.split()
                remain.remove('<=')
                for dep_task in remain:
                    dep_task = dep_task.strip('[]')
                    dic[taskname].append(dep_task)
            else:
                print '[Syntax Error]: %s' % line
                return None
    return dic


def exec_task(taskqueue, lock, exec_flag):
    while True:
        taskname = None
        lock.acquire()
        if taskqueue.qsize() > 0:
            taskname = taskqueue.get()
        lock.release()
        if taskname:
            print '%s start' % taskname
            time.sleep(1)       # simulate executing time
            # create a <task>.done file to indicate the task has been finished
            filename = taskname+'.done'
            f= open(filename, 'w')
            f.close()
            print '%s end' % taskname
            taskname = None
        else:
            time.sleep(1)       # currently empty
        if not exec_flag.value:
            return

def gen_taskqueue_from_dict(dict_task_deps, taskqueue, added_tasks):
    for key in dict_task_deps:
        if len(dict_task_deps[key]) == 0 and key not in added_tasks:
            taskqueue.put(key) # no dep tasks for key, add it to taskqueue
            added_tasks.append(key)
            print 'add task to taskqueue: %s' % key            
    return

def remove_task_from_dict(dict_task_deps):
    filelist = [ f for f in os.listdir(".") if f.endswith(".done") ]
    for f in filelist:
        taskname = f[:-5]
        os.remove(f)
        # remove taskname as an independent task from dict
        if taskname in dict_task_deps:
            del dict_task_deps[taskname]
        # remove from dict taskname as a dependent task
        for task in dict_task_deps:
            dep_list = dict_task_deps[task]
            if taskname in dep_list:
                dep_list.remove(taskname)
        print 'remove task: %s' % taskname
        print dict_task_deps    
        return

def test(filename):
    exec_flag = Value('b', True)
    taskqueue = Queue(1024)
    added_tasks = []

    with open(filename, 'r') as f:
        dict_task_deps = con_dic(f)
    print dict_task_deps
    processes = []
    lock = Lock()
    for i in range(4):
        p = Process(target=exec_task, args=(taskqueue, lock, exec_flag))
        processes.append(p)
    for proc in processes:
        proc.start()
    while True:
        # generate taskqueue continuously from dict_task_deps
        # if dict_task_deps is emtpy, stop execution
        lock.acquire()
        if len(dict_task_deps) == 0 and taskqueue.qsize() == 0:
            exec_flag.value = False
            lock.release()
            break
        else:
            gen_taskqueue_from_dict(dict_task_deps, taskqueue, added_tasks)
            remove_task_from_dict(dict_task_deps)
        lock.release()
        time.sleep(1)
    while not taskqueue.empty():
        print 'waiting for remaining tasks to be finished...'
        time.sleep(1)
    for proc in processes:
        proc.join()
    print 'Done'
    return

if __name__ == '__main__':
    if len(sys.argv) >= 2:
        test(sys.argv[1])
    else:
        print 'parallel_exec <inputfile>'


chenqi@chenqi-OptiPlex-760:~/mypro/python/parallel-execution$ ./parallel-exec.py input
{'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': ['t11'], 't2': ['t3'], 't3': ['t11'], 't1': ['t2', 't3', 't4'], 't10': [], 't11': []}
add task to taskqueue: t7
add task to taskqueue: t10
add task to taskqueue: t11
t7 start
t11 start
t10 start
t11 end
remove task: t11
{'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4'], 't10': []}
t10 end
t7 end
add task to taskqueue: t5
add task to taskqueue: t3
remove task: t10
{'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']}
t5 start
t3 start
t5 end
remove task: t7
{'t8': [], 't9': ['t8'], 't6': ['t5'], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']}
t3 end
add task to taskqueue: t8
remove task: t5
{'t8': [], 't9': ['t8'], 't6': [], 't4': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']}
t8 start
t8 end
add task to taskqueue: t6
add task to taskqueue: t4
remove task: t8
{'t9': [], 't6': [], 't4': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']}
t6 start
add task to taskqueue: t9
remove task: t3
{'t9': [], 't6': [], 't4': [], 't2': [], 't1': ['t2', 't4']}
t4 start
t9 start
t6 end
t4 end
add task to taskqueue: t2
t9 end
remove task: t6
{'t9': [], 't4': [], 't2': [], 't1': ['t2', 't4']}
t2 start
t2 end
remove task: t2
{'t9': [], 't4': [], 't1': ['t4']}
remove task: t9
{'t4': [], 't1': ['t4']}
remove task: t4
{'t1': []}
add task to taskqueue: t1
t1 start
t1 end
remove task: t1
{}
Done
chenqi@chenqi-OptiPlex-760:~/mypro/python/parallel-execution$ 

 

你可能感兴趣的:(python,dependency,executor,task)