Dummy Task Executor for parallel execution of tasks with complex dependency relationships
(Just a demo for demonstration purpose, much to be improved)
File List: parallel_exec.py input output
#!/usr/bin/env python ''' a demo illustrating parallel execution of tasks with complex dependency relationships ''' import re import sys, os import time from multiprocessing import Process, Lock, Value, Queue def con_dic(inputfile): ''' construct a dictionary from inputfile which should has the form of {taskname:[dep-task list], ...} and return the dictionary ''' dic = {} line_pattern = re.compile(r'\[(.+?)\](.*)') for line in inputfile: line = line.strip() # strip leading and following blanks m = line_pattern.match(line) if not m: print 'Not Match: %s' % line return None else: taskname = m.group(1) remain = m.group(2) if taskname in dic: pass else: dic[taskname] = [] # an empty dep list remain = remain.strip() # strip blanks if len(remain)==0: pass elif remain.startswith('<='): # extract the dependent task list remain = remain.split() remain.remove('<=') for dep_task in remain: dep_task = dep_task.strip('[]') dic[taskname].append(dep_task) else: print '[Syntax Error]: %s' % line return None return dic def exec_task(taskqueue, lock, exec_flag): while True: taskname = None lock.acquire() if taskqueue.qsize() > 0: taskname = taskqueue.get() lock.release() if taskname: print '%s start' % taskname time.sleep(1) # simulate executing time # create a <task>.done file to indicate the task has been finished filename = taskname+'.done' f= open(filename, 'w') f.close() print '%s end' % taskname taskname = None else: time.sleep(1) # currently empty if not exec_flag.value: return def gen_taskqueue_from_dict(dict_task_deps, taskqueue, added_tasks): for key in dict_task_deps: if len(dict_task_deps[key]) == 0 and key not in added_tasks: taskqueue.put(key) # no dep tasks for key, add it to taskqueue added_tasks.append(key) print 'add task to taskqueue: %s' % key return def remove_task_from_dict(dict_task_deps): filelist = [ f for f in os.listdir(".") if f.endswith(".done") ] for f in filelist: taskname = f[:-5] os.remove(f) # remove taskname as an independent task from dict if taskname in dict_task_deps: del dict_task_deps[taskname] # remove from dict taskname as a dependent task for task in dict_task_deps: dep_list = dict_task_deps[task] if taskname in dep_list: dep_list.remove(taskname) print 'remove task: %s' % taskname print dict_task_deps return def test(filename): exec_flag = Value('b', True) taskqueue = Queue(1024) added_tasks = [] with open(filename, 'r') as f: dict_task_deps = con_dic(f) print dict_task_deps processes = [] lock = Lock() for i in range(4): p = Process(target=exec_task, args=(taskqueue, lock, exec_flag)) processes.append(p) for proc in processes: proc.start() while True: # generate taskqueue continuously from dict_task_deps # if dict_task_deps is emtpy, stop execution lock.acquire() if len(dict_task_deps) == 0 and taskqueue.qsize() == 0: exec_flag.value = False lock.release() break else: gen_taskqueue_from_dict(dict_task_deps, taskqueue, added_tasks) remove_task_from_dict(dict_task_deps) lock.release() time.sleep(1) while not taskqueue.empty(): print 'waiting for remaining tasks to be finished...' time.sleep(1) for proc in processes: proc.join() print 'Done' return if __name__ == '__main__': if len(sys.argv) >= 2: test(sys.argv[1]) else: print 'parallel_exec <inputfile>'
chenqi@chenqi-OptiPlex-760:~/mypro/python/parallel-execution$ ./parallel-exec.py input {'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': ['t11'], 't2': ['t3'], 't3': ['t11'], 't1': ['t2', 't3', 't4'], 't10': [], 't11': []} add task to taskqueue: t7 add task to taskqueue: t10 add task to taskqueue: t11 t7 start t11 start t10 start t11 end remove task: t11 {'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4'], 't10': []} t10 end t7 end add task to taskqueue: t5 add task to taskqueue: t3 remove task: t10 {'t8': ['t7'], 't9': ['t8', 't7'], 't6': ['t5'], 't7': [], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']} t5 start t3 start t5 end remove task: t7 {'t8': [], 't9': ['t8'], 't6': ['t5'], 't4': ['t5'], 't5': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']} t3 end add task to taskqueue: t8 remove task: t5 {'t8': [], 't9': ['t8'], 't6': [], 't4': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']} t8 start t8 end add task to taskqueue: t6 add task to taskqueue: t4 remove task: t8 {'t9': [], 't6': [], 't4': [], 't2': ['t3'], 't3': [], 't1': ['t2', 't3', 't4']} t6 start add task to taskqueue: t9 remove task: t3 {'t9': [], 't6': [], 't4': [], 't2': [], 't1': ['t2', 't4']} t4 start t9 start t6 end t4 end add task to taskqueue: t2 t9 end remove task: t6 {'t9': [], 't4': [], 't2': [], 't1': ['t2', 't4']} t2 start t2 end remove task: t2 {'t9': [], 't4': [], 't1': ['t4']} remove task: t9 {'t4': [], 't1': ['t4']} remove task: t4 {'t1': []} add task to taskqueue: t1 t1 start t1 end remove task: t1 {} Done chenqi@chenqi-OptiPlex-760:~/mypro/python/parallel-execution$