subprocess source code learning - posix - 1


Just start from the initialization function Popen. Code like this:

    def __init__(self, args, bufsize=0, executable=None,
                 stdin=None, stdout=None, stderr=None,
                 preexec_fn=None, close_fds=False, shell=False,
                 cwd=None, env=None, universal_newlines=False,
                 startupinfo=None, creationflags=0):
        """Create new Popen instance."""
        _cleanup() # refer to Note 1

        self._child_created = False
        if not isinstance(bufsize, (int, long)):
            raise TypeError("bufsize must be an integer")

        # preexec_fn and close_fds are not supported by Windows
        if mswindows:
            if preexec_fn is not None:
                raise ValueError("preexec_fn is not supported on Windows "
                                 "platforms")
            if close_fds and (stdin is not None or stdout is not None or
                              stderr is not None):
                raise ValueError("close_fds is not supported on Windows "
                                 "platforms if you redirect stdin/stdout/stderr")
        
        # startupinfo and creationflags are not suppported by posix systems
        else:
            # POSIX
            if startupinfo is not None:
                raise ValueError("startupinfo is only supported on Windows "
                                 "platforms")
            if creationflags != 0:
                raise ValueError("creationflags is only supported on Windows "
                                 "platforms")

        self.stdin = None
        self.stdout = None
        self.stderr = None
        self.pid = None
        self.returncode = None
        self.universal_newlines = universal_newlines

        # Input and output objects. The general principle is like
        # this:
        #
        # Parent                   Child
        # ------                   -----
        # p2cwrite   ---stdin--->  p2cread
        # c2pread    <--stdout---  c2pwrite
        # errread    <--stderr---  errwrite
        #
        # On POSIX, the child objects are file descriptors.  On
        # Windows, these are Windows file handles.  The parent objects
        # are file descriptors on both platforms.  The parent objects
        # are None when not using PIPEs. The child objects are None
        # when not redirecting.

        (p2cread, p2cwrite,
         c2pread, c2pwrite,
         errread, errwrite) = self._get_handles(stdin, stdout, stderr) # refer to Note 2

        self._execute_child(args, executable, preexec_fn, close_fds,
                            cwd, env, universal_newlines,
                            startupinfo, creationflags, shell,
                            p2cread, p2cwrite,
                            c2pread, c2pwrite,
                            errread, errwrite) # launch the sub process. Refer to Note 3

        if mswindows:
            if p2cwrite is not None:
                p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0)
            if c2pread is not None:
                c2pread = msvcrt.open_osfhandle(c2pread.Detach(), 0)
            if errread is not None:
                errread = msvcrt.open_osfhandle(errread.Detach(), 0)

        # open fds of p2cwrite, c2pread, errread
        if p2cwrite is not None:
            self.stdin = os.fdopen(p2cwrite, 'wb', bufsize)
        if c2pread is not None:
            if universal_newlines:
                # if universal_newlines is indicated
                # tips about universal_newlines support:
                # In addition to the standard fopen() values mode may be 'U' or 'rU'. 
                # Python is usually built with universal newline support; 
                # supplying 'U' opens the file as a text file, but lines may be terminated by any of the following: 
                # the Unix end-of-line convention '\n', the Macintosh convention '\r', or the Windows convention '\r\n'. 
                # All of these external representations are seen as '\n' by the Python program. 
                # If Python is built without universal newline support a mode with 'U' is the same as normal text mode. 
                # Note that file objects so opened also have an attribute called newlines which has a value of None (if no newlines have yet been seen), '\n', '\r', '\r\n', or a tuple containing all the newline types seen.
                self.stdout = os.fdopen(c2pread, 'rU', bufsize) 
            else:
                self.stdout = os.fdopen(c2pread, 'rb', bufsize)
        if errread is not None:
            if universal_newlines:
                self.stderr = os.fdopen(errread, 'rU', bufsize)
            else:
                self.stderr = os.fdopen(errread, 'rb', bufsize)

Note 1:

Code of _cleanup() is like this:

def _cleanup():
    '''
    This function is used to remove the finished child process
    '''
    for inst in _active[:]: 
        if inst._internal_poll(_deadstate=sys.maxint) >= 0: # refer to Note 1-1
            # return_value >= 0, means that the child process is finished.
            try:
                _active.remove(inst) # _active is initialized to [] when module of subprocess is first present (importted).
            except ValueError:
                # This can happen if two threads create a new Popen instance.
                # It's harmless that it was already removed, so ignore.
                pass

Note 1-1:

Code of self._internal_poll() is like this, which is defined within class Popen.

For finished sub processes, this function will return an integer GE 0, while None for the unfinished sub processes.

        def _internal_poll(self, _deadstate=None):
            """Check if child process has terminated.  Returns returncode
            attribute."""
            if self.returncode is None: 
                # means the child is not finished yet, then check it for another time.
                try:
                    pid, sts = os.waitpid(self.pid, os.WNOHANG) # this will return immediately, if sub process finished, the return value is (pid, status)
                                                                # else, the return value is (0, 0)                        
                        if pid == self.pid: 
                        # if child process finished, deal with return code
                        self._handle_exitstatus(sts) # refer to Note 1-1-1
                    else: 
                        # else, do nothing, self.returncode would be None.
                        pass
                except os.error:
                    if _deadstate is not None: 
                        # if error occurs during watipid(), return the max int
                        self.returncode = _deadstate

            # return the return code directly, if the process already finished
            return self.returncode

Note 1-1-1:

Code of self._handle_exitstatus() is like the following code clips. And this function is implemented within class Popen.

If the child is terminated by a signal, then set the return code as the signal number;

if the child exits with system call exit(), then return the parameter of exit(). For example, if the exit statement is "exit(3)", then, 3 will be assigned to self.returncode.

        def _handle_exitstatus(self, sts):                      
            if os.WIFSIGNALED(sts):
                # Return True if the process exited due to a signal, otherwise return False. Availability: Unix.
                self.returncode = -os.WTERMSIG(sts) # Return the signal which caused the process to exit. Availability: Unix.
            elif os.WIFEXITED(sts):
                # Return True if the process exited using the exit(2) system call, otherwise return False. Availability: Unix.
                self.returncode = os.WEXITSTATUS(sts) # If WIFEXITED(status) is true, return the integer parameter to the exit(2) system call. Otherwise, the return value is meaningless. Availability: Unix.
            else:
                # Should never happen
                raise RuntimeError("Unknown child exit status!")

Note 2:

self._get_handles() is preparing the pipes for the sub process. 

According to the official comments:

        # Parent                   Child
        # ------                   -----
        # p2cwrite   ---stdin--->  p2cread
        # c2pread    <--stdout---  c2pwrite
        # errread    <--stderr---  errwrite
we call tell that,

p2cread is used for child to get the input from parent;

p2cwrite is used for parent to send input to child;

c2pread is used by parent to get output from chuild;

c2pwrite is used by child to send output to parent;

errread is used by parent to get error info from child;

errwirte is used by child to send error info to parent.

Code is like this:

        def _get_handles(self, stdin, stdout, stderr):
            """Construct and return tupel with IO objects:
            p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite
            """
            p2cread, p2cwrite = None, None
            c2pread, c2pwrite = None, None
            errread, errwrite = None, None

            # stdin, stdout can be:
            # None;
            # PIPE;
            # a file descirptor (a positive number);
            # a file-object; 

            # With None, no redirection will occur; the child’s file handles will be inherited from the parent
            # with PIPE, a new pipe will be craeted;

            if stdin is None:
                pass
            elif stdin == PIPE:
                p2cread, p2cwrite = os.pipe()
            elif isinstance(stdin, int): 
                p2cread = stdin 
            else:
                p2cread = stdin.fileno() # get the fd of the file-object 

            if stdout is None:
                pass
            elif stdout == PIPE:
                c2pread, c2pwrite = os.pipe()
            elif isinstance(stdout, int):
                c2pwrite = stdout
            else:
                c2pwrite = stdout.fileno()

            if stderr is None:
                pass
            elif stderr == PIPE:
                errread, errwrite = os.pipe()
            elif stderr == STDOUT:
                # stedrr has another more option
                errwrite = c2pwrite
            elif isinstance(stderr, int):
                errwrite = stderr
            else:
                # Assuming file-like object
                errwrite = stderr.fileno()

            return (p2cread, p2cwrite,
                    c2pread, c2pwrite,
                    errread, errwrite)


Note 3:

The child process is being launched by this, of which the code is:

        def _execute_child(self, args, executable, preexec_fn, close_fds,
                           cwd, env, universal_newlines,
                           startupinfo, creationflags, shell,
                           p2cread, p2cwrite,
                           c2pread, c2pwrite,
                           errread, errwrite):
            """Execute program (POSIX version)"""

            # convert the args to list
            if isinstance(args, types.StringTypes): # module of types defines names for all type symbols known in the standard interpreter.
                                                    # types.StringTypes = (<type 'int'>, <type 'unicode'>)
                                                    # if args are passed in as string, code will arrive at here
                args = [args]
            else:
                # if args are passed in as sequence, code will come here
                args = list(args)

            # handle shell
            if shell:
                args = ["/bin/sh", "-c"] + args

            # handle executable
            # consider this scenario, exectable is not indicated, means, it's None as default, and
            # the args are passed in as a string, for example, 'ls -l'
            # then what's will happen?
            # this section of code is consider 'ls -l' as the executable, and absoultly, will get error
            if executable is None:
                executable = args[0]

            # For transferring possible exec failure from child to parent
            # The first char specifies the exception type: 0 means
            # OSError, 1 means some other error.
            # this pipe is used to get errors when try to luanch the executable.
            # and after the executable is launched successfully, errread and errwrite will be used get the errors            
            errpipe_read, errpipe_write = os.pipe()
            try:
                try:
                    self._set_cloexec_flag(errpipe_write) # ?? don't konw what's this used for.

                    gc_was_enabled = gc.isenabled() # ?? don't konw why
                    # Disable gc to avoid bug where gc -> file_dealloc ->
                    # write to stderr -> hang.  http://bugs.python.org/issue1336
                    # disable gobage collection before fork a clild process and then enable it.
                    gc.disable()
                    try:
                        self.pid = os.fork() # fork a sub process
                    except:
                        if gc_was_enabled:
                            gc.enable()
                        raise
                    self._child_created = True
                    if self.pid == 0:
                        # Child
                        try:
                            # Close parent's pipe ends
                            if p2cwrite is not None:
                                os.close(p2cwrite)
                            if c2pread is not None:
                                os.close(c2pread)
                            if errread is not None:
                                os.close(errread)
                            os.close(errpipe_read)

                            # Dup fds for child
                            # This is preparing for the call of self._close_fds() if close_fds is indicated
                            if p2cread is not None:
                                os.dup2(p2cread, 0)
                            if c2pwrite is not None:
                                os.dup2(c2pwrite, 1)
                            if errwrite is not None:
                                os.dup2(errwrite, 2)

                            # Close pipe fds.  Make sure we don't close the same
                            # fd more than once, or standard fds.
                            if p2cread is not None and p2cread not in (0,):
                                os.close(p2cread)
                            if c2pwrite is not None and c2pwrite not in (p2cread, 1):
                                os.close(c2pwrite)
                            if errwrite is not None and errwrite not in (p2cread, c2pwrite, 2):
                                os.close(errwrite)

                            # Close all other fds, if asked for
                            # If close_fds is true, all file descriptors except 0, 1 and 2 will be closed before the child process is executed. (Unix only).
                            if close_fds:
                                self._close_fds(but=errpipe_write) # Refer to Note 3-1
                            
                            # change cwd if cwd is indicated
                            if cwd is not None:
                                os.chdir(cwd)

                            # run preexec_fn if it's indicated
                            if preexec_fn:
                                preexec_fn()
                                
                            # launch the executable
                            # On Unix, the new executable is loaded into the current process, and will have the same process id as the caller. 
                            # So, the executable will have the same pid with the child process
                            if env is None:
                                os.execvp(executable, args)
                            else:
                                os.execvpe(executable, args, env)

                        except:
                            exc_type, exc_value, tb = sys.exc_info() # get exception info
                            exc_lines = traceback.format_exception(exc_type,
                                                                   exc_value,
                                                                   tb) # format exception info
                            exc_value.child_traceback = ''.join(exc_lines) # ?? Why do we need attribute of child_traceback here??
                            os.write(errpipe_write, pickle.dumps(exc_value)) # pickle Exception object to string and wirte the pipe.
                                                                             # note, the errpipe_write is being used here, instead of errwite
                        # This exitcode won't be reported to applications, so it
                        # really doesn't matter what we return.
                        os._exit(255) # note the difference here between os.exit() and os._exit()

                    # Parent
                    if gc_was_enabled:
                        gc.enable()
                finally:
                    # be sure the FD is closed no matter what, and it's closed by both parent and child
                    # Parent is closing this, becuase it's no use to parent;
                    # Child is closing this, becuase it has wirotten error info to it
                    os.close(errpipe_write)

                if p2cread is not None and p2cwrite is not None:
                    os.close(p2cread) # close p2cread, which are intend for child, and leave p2cwrite to input something to child
                if c2pwrite is not None and c2pread is not None:
                    os.close(c2pwrite) # close c2pwrite, which is intend for child, and leave c2pread to get output of child
                if errwrite is not None and errread is not None:
                    os.close(errwrite) # close errwrite, which is intend for child, and leave errread to get the errors of child

                # Wait for exec to fail or succeed; possibly raising exception
                # Exception limited to 1M
                # check the status of launching exectable.
                data = _eintr_retry_call(os.read, errpipe_read, 1048576) # Refer to Note 3-2
            finally:
                # be sure the FD is closed no matter what
                # errpipe_read is only closed in parent process, becuase it's no use now
                # and the child has already closed it's own errpipe_read. 
                os.close(errpipe_read)

            # data != "" means, there is somthing wrong when launching the executable 
            # if the executable is not launched successfully, should:
            # wait sub process finishing,
            # load the exception object,
            # close the pipes,
            # and then raise error
            if data != "":
                _eintr_retry_call(os.waitpid, self.pid, 0)
                child_exception = pickle.loads(data)
                for fd in (p2cwrite, c2pread, errread):
                    if fd is not None:
                        os.close(fd)
                raise child_exception

Note 3-1:

Code of self._close_fds() is like this. And as you see, this function will close all the fds except 0, 1, 2 and the one indicated by "but"

        def _close_fds(self, but):
            os.closerange(3, but) # close all fds fo [3, but), but will not be closed.
            os.closerange(but + 1, MAXFD)

Note 3-2:

Code of _eintr_retry_call() is like this. Not very sure about this function, but looks like its behavior is like this:

If a system call (if the func is a system call) is interrupted, then try again, till it finishes or other error occurs.

def _eintr_retry_call(func, *args):
    while True:
        try:
            return func(*args)
        except OSError, e:
            if e.errno == errno.EINTR:
                # errno.EINTR means interupted system call why this??
                continue
            raise

Other functions except the initialization function are very easy:

self.poll()

    def poll(self):
        return self._internal_poll() # Refer to Note 1-1
self.wait()
        def wait(self):
            """Wait for child process to terminate.  Returns returncode
            attribute."""
            if self.returncode is None:
                pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0) # please note, the parent should be blocked here. Refer to Note 3-2
                self._handle_exitstatus(sts) # Refer to Note 1-1-1
            return self.returncode
self.send_signal, self.terminate and self.kill():

        def send_signal(self, sig):
            """Send a signal to the process
            """
            os.kill(self.pid, sig) # Send signal sig to the process pid. 
                                   # Constants for the specific signals available on the host platform are defined in the signal module. 
                                   # Availability: Unix.
                                   
        def terminate(self):
            """Terminate the process with SIGTERM
            """
            self.send_signal(signal.SIGTERM)

        def kill(self):
            """Kill the process with SIGKILL
            """
            self.send_signal(signal.SIGKILL)



To Be Continued.

你可能感兴趣的:(subprocess source code learning - posix - 1)