经常会用到python去调用外部 工具或者命令去干活
有的时候子进程并不按预期退出
比如,子进程由于某种原因挂在那里,
这时候也许,我们有这样一种需求:需要父进程对子进程有监控动作,即,超过一定的时间,就不再等待子进程自己退出,而是去kill子进程,回收资源
以下会列出几张实现方法
1.os.system
http://docs.python.org/library/os.html
Python 2.5.2 (r252:60911, Jan 4 2009, 17:40:26) [GCC 4.3.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import os >>> os.system("date") Wed Jun 10 19:34:23 CST 2009 0 >>>
其实是执行 linux shell 命令
$ date Wed Jun 10 19:36:02 CST 2009
缺点:
A. os.system() 是新起一个shell去干活的,对系统的开销比较大
B. 获得输出等信息比较麻烦,不能与外部命令或工具交互
C. 无法控制,(如果调用的外部命令,挂死或者执行时间很长),主进程无法控制os.system(), 因为调用os.system(cmd) 调用进程会block, until os.system() 自己退出
2.commands
[url]
http://docs.python.org/library/commands.html[/url]
tommy@lab3:~$ python Python 2.5.2 (r252:60911, Jan 4 2009, 17:40:26) [GCC 4.3.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import commands >>> dir(commands) ['__all__', '__builtins__', '__doc__', '__file__', '__name__', 'getoutput', 'getstatus', 'getstatusoutput', 'mk2arg', 'mkarg'] >>> commands.getoutput("date") 'Wed Jun 10 19:39:57 CST 2009' >>> >>> commands.getstatusoutput("date") (0, 'Wed Jun 10 19:40:41 CST 2009')
优点:
A. 容易获得外部命令的输出,已经退出状态
缺点:
同os.system()中的B,C
3.subprocess
http://docs.python.org/library/subprocess.html
tommy@lab3:~$ python Python 2.5.2 (r252:60911, Jan 4 2009, 17:40:26) [GCC 4.3.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import subprocess >>> dir(subprocess) ['CalledProcessError', 'MAXFD', 'PIPE', 'Popen', 'STDOUT', '__all__', '__builtins__', '__doc__', '__file__', '__name__', '_active', '_cleanup', '_demo_posix', '_demo_windows', 'call', 'check_call', 'errno', 'fcntl', 'gc', 'list2cmdline', 'mswindows', 'os', 'pickle', 'select', 'signal', 'sys', 'traceback', 'types'] >>> Popen = subprocess.Popen(["date"]) Wed Jun 10 19:48:41 CST 2009 >>> Popen.pid 24723 >>>
优点:
看文档吧,可以支持和子进程交互等等
虽然 python2.6中的subprocess模块增加了
kill()
terminate()
来控制子进程退出
但是在实际的使用过程中会发现
如果子进程并不是自己退出,而是调用 kill()/terminate() 给子进程发信退出
通过 top 或者 ps -A 看到,子进程的确是释放资源了,但是却变成了 zombie(僵尸进程)
于是分析 subprocess.py模块
1201 1202 def send_signal(self, sig): 1203 """Send a signal to the process 1204 """ 1205 os.kill(self.pid, sig) 1206 1207 def terminate(self): 1208 """Terminate the process with SIGTERM 1209 """ 1210 self.send_signal(signal.SIGTERM) 1211 1212 def kill(self): 1213 """Kill the process with SIGKILL 1214 """ 1215 self.send_signal(signal.SIGKILL)
程序仅仅是 调用 os.kill(self.pid, sig) 向子进程发送了一个信号后,标准subprocess.py库 父进程并没有显示区 wait() 子进程,导致了 zombie(僵尸进程) 的生成
所以问题找到,
修改subprocess.py模块,显然不妥,
那就封装一下(继承subprocess),
我是用这个subprocess去调用mencoder 做批量转码,所以为子进程超时,要有很好控制,
具体实现见附件
显示的封装成两个函数
1.
shell_2_tty(_cmd=cmds, _cwd=None, _timeout=10*60)
# _cmd 是要执行的外面命令行,要是一个 list, 如果是str,shell=True,会启动一个新的shell去干活的,这样,不利于进程的控制
# _cwd 是执行这个命令行前,cd到这个路径下面,这个,对我的用应很重要,如果不需要可以用默认值
# _timeout 这个是主角,设置超时时间(秒单位),从真重执行命令行开始计时,墙上时间超过 _timeout后,父进程会kill掉子进程,回收资源,并避免产生 zombie(僵尸进程)
# 并将调用的命令行输出,直接输出到stdout,即是屏幕的终端上,
(如果对输出比较讨厌,可以将 stdout = open("/dev/null", "w"), stderr=open("/dev/null"),等等)
2.
shell_2_tempfile(_cmd=cmds, _cwd=None, _timeout=10)
类同1,主要是增加,对命令行的输出,捕获,并返回给父进程,留作分析
------------------------
#!/usr/local/bin/python #-*- coding: UTF-8 -*- # subwork __author__ ="tommy ([email protected])" __date__ ="2009-01-06 16:33" __copyright__="Copyright 2009 tudou, Inc" __license__ ="Td, Inc" __version__ ="0.1" import os import time import signal import tempfile import traceback import subprocess __all__ = ["subwork", "trace_back", "os", "time", "traceback", "subprocess", "signal"] def trace_back(): try: type, value, tb = sys.exc_info() return str(''.join(traceback.format_exception(type, value, tb))) except: return '' def getCurpath(): try: return os.path.normpath(os.path.join(os.getcwd(),os.path.dirname(__file__))) except: return class subwork: """add timeout support! if timeout, we SIGTERM to child process, and not to cause zombie process safe! """ def __init__(self, stdin=None, stdout=None, stderr=None, cmd=None, cwd=None, timeout=5*60*60): """default None """ self.cmd = cmd self.Popen = None self.pid = None self.returncode= None self.stdin = None self.stdout = stdout self.stderr = stderr self.cwd = cwd self.timeout = int(timeout) self.start_time= None self.msg = '' def send_signal(self, sig): """Send a signal to the process """ os.kill(self.pid, sig) def terminate(self): """Terminate the process with SIGTERM """ self.send_signal(signal.SIGTERM) def kill(self): """Kill the process with SIGKILL """ self.send_signal(signal.SIGKILL) def wait(self): """ wait child exit signal, """ self.Popen.wait() def free_child(self): """ kill process by pid """ try: self.terminate() self.kill() self.wait() except: pass def run(self): """ run cmd """ print "[subwork]%s" % split_cmd(self.cmd) code = True try: self.Popen = subprocess.Popen(args=split_cmd(self.cmd), stdout=self.stdout, stderr=self.stderr, cwd=self.cwd) self.pid = self.Popen.pid self.start_time = time.time() while self.Popen.poll() == None and (time.time() - self.start_time) < self.timeout : time.sleep(1) #print "running... %s, %s, %s" % (self.Popen.poll(), time.time() - self.start_time, self.timeout) except: self.msg += trace_back() self.returncode = -9998 code = False print "[subwork]!!error in Popen" # check returncode if self.Popen.poll() == None: # child is not exit yet! self.free_child() self.returncode = -9999 else: self.returncode = self.Popen.poll() # return return {"code":code,/ "msg":self.msg,/ "req":{"returncode":self.returncode},/ } def split_cmd(s): """ str --> [], for subprocess.Popen() """ SC = '"' a = s.split(' ') cl = [] i = 0 while i < len(a) : if a[i] == '' : i += 1 continue if a[i][0] == SC : n = i loop = True while loop: if a[i] == '' : i += 1 continue if a[i][-1] == SC : loop = False m = i i += 1 #print a[n:m+1] #print ' '.join(a[n:m+1])[1:-1] cl.append((' '.join(a[n:m+1]))[1:-1]) else: cl.append(a[i]) i += 1 return cl def check_zero(dic=None): """ check returncode is zero """ if not isinstance(dic, dict): raise TypeError, "dist must be a Distribution instance" print "returncode :", int(dic["req"]["returncode"]) if int(dic["req"]["returncode"]) == 0: return True, dic["msg"] else: return False, dic["msg"] def shell_2_tty(_cmd=None, _cwd=None, _timeout=5*60*60): """ """ try: shell=subwork(cmd=_cmd, stdout=None, stderr=None, cwd=_cwd, timeout=_timeout) return check_zero(shell.run()) except: return False, trace_back() def shell_2_tempfile(_cmd=None, _cwd=None, _timeout=5*60*60): """ to collect out-string by tempfile """ try: try: fout=tempfile.TemporaryFile() ferr=tempfile.TemporaryFile() shell=subwork(cmd=_cmd, stdout=fout, stderr=ferr, cwd=_cwd, timeout=_timeout) req=check_zero(shell.run()) # get media info from tmp_out fout.seek(0) out=fout.read() if not out: ferr.seek(0) out=ferr.read() # return req[0], str(out) finally: fout.close() ferr.close() except: return False, trace_back() #--------------------------------------------- # main-test #--------------------------------------------- if __name__ == '__main__' : cmds = "ping www.google.cn" cmds = "dir" curlCMDS = 'curl -s -H "Host:i.api.weibo.com" -u "[email protected]:111111" "http://10.75.14.218/search/statuses/user_timeline.json?source=2450326868&key=%25&cuid=1801892161&sid=openapi"' #print shell_2_tty(_cmd=cmds, _cwd=None, _timeout=10) print shell_2_tempfile(_cmd=curlCMDS, _cwd=None, _timeout=1000) print "/nexit!" time.sleep(60)