Linux内核进程创建do_fork()解析

内核创建进行所进行的工作,本文阅读的内核代码为Linux kernel 2.6。

  进程创建的大部分工作由do_fork这个函数完成,函数原型如下:

long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          struct pt_regs *regs,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
View Code

1、首先进行一些参数及权限的检查。

    if (clone_flags & CLONE_NEWUSER) {
        if (clone_flags & CLONE_THREAD)
            return -EINVAL;
        /* hopefully this check will go away when userns support is
         * complete
         */
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
                !capable(CAP_SETGID))
            return -EPERM;
    }
View Code

2、进行状态的检查。这里主要是进行进程停止状态的检查。

if (unlikely(clone_flags & CLONE_STOPPED)) {
        static int __read_mostly count = 100;

        if (count > 0 && printk_ratelimit()) {
            char comm[TASK_COMM_LEN];

            count--;
            printk(KERN_INFO "fork(): process `%s' used deprecated "
                    "clone flags 0x%lx\n",
                get_task_comm(comm, current),
                clone_flags & CLONE_STOPPED);
        }
    }
View Code

3、用户空间检查,下面这段代码比较有用。主要是进行准备复制准备工作然后复制当前进程。

/*   * When called from kernel_thread, don't do user tracing stuff.   */

 if (likely(user_mode(regs)))   trace = tracehook_prepare_clone(clone_flags);

 p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

首先是用户空间压栈操作,保存寄存器。其中regs是这么一个参数。

struct pt_regs {
 unsigned long long pc;
 unsigned long long sr;
 long long syscall_nr;
 unsigned long long regs[63];
 unsigned long long tregs[8];
 unsigned long long pad[2];
};

从结构体中的成员可以看到,包含

pc:程序计数器

sr:scratch寄存器

syscall_nr:系统调用

总之,这里是包含进程在退出cpu时所需的最小信息。

 p = copy_process(clone_flags, stack_start, regs, stack_size,     child_tidptr, NULL, trace);

这里即复制一个进程。返回值为task_struct的结构体,该结构描述了一个进程的基本状态。这里并不进行详细的介绍。

4,、对创建的进程进行一些错误检查。这里发生的可能性不大,可以暂时先不管,把握住我们的主线。

if (!IS_ERR(p)) {   struct completion vfork;

  trace_sched_process_fork(current, p);

  nr = task_pid_vnr(p);

  if (clone_flags & CLONE_PARENT_SETTID)    put_user(nr, parent_tidptr);

  if (clone_flags & CLONE_VFORK) {    p->vfork_done = &vfork;    init_completion(&vfork);  

 }

5、然后接着下面两个函数。

  audit_finish_fork(p);//主要是检查完成的进程的状态。
  tracehook_report_clone(regs, clone_flags, nr, p);//主要是阻塞刚刚创建的子进程,因为现在还是在父进程进程中,子进程并未开始执行,暂时挂起子进程。下面为函数解释。

/**
 * tracehook_report_clone - in parent, new child is about to start running
 * @regs:  parent's user register state
 * @clone_flags: flags from parent's system call
 * @pid:  new child's PID in the parent's namespace
 * @child:  new child task
 *
 * Called after a child is set up, but before it has been started running.
 * This is not a good place to block, because the child has not started
 * yet.  Suspend the child here if desired, and then block in
 * tracehook_report_clone_complete().  This must prevent the child from
 * self-reaping if tracehook_report_clone_complete() uses the @child
 * pointer; otherwise it might have died and been released by the time
 * tracehook_report_clone_complete() is called.
 *
 * Called with no locks held, but the child cannot run until this returns.
 */

 6、设置进程标志位。

  /*
   * We set PF_STARTING at creation in case tracing wants to
   * use this to distinguish a fully live task from one that
   * hasn't gotten to tracehook_report_clone() yet.  Now we
   * clear it and set the child going.
   */
  p->flags &= ~PF_STARTING;

(PF_STARTING宏定义解释为:#define PF_STARTING 0x00000002 /* being created */,表明该进程已创建)

7、唤醒进程。这里先判断复制标志是否为 CLONE_STOPPED状态,但是大多数情形下,并不为CLONE_STOPPED状态。

 if (unlikely(clone_flags & CLONE_STOPPED)) {
   /*
    * We'll start up with an immediate SIGSTOP.
    */
   sigaddset(&p->pending.signal, SIGSTOP);
   set_tsk_thread_flag(p, TIF_SIGPENDING);
   __set_task_state(p, TASK_STOPPED);
  } else {
   wake_up_new_task(p, clone_flags);
  }

  tracehook_report_clone_complete(trace, regs,
      clone_flags, nr, p);/*这个函数主要是报告当前之前阻塞的子进程已经开始运行*/

下面是wake_up_new_task函数功能解释。主要功能是首次唤醒创建的进程,同时完成一些初始化调度的所需的工作,并将进程放入运行队列中。

关于一个进程如何添加到队列中去,可以从这里进行研究。本文先不对此进行研究,还是放在进程的创建上来。

/*
 * wake_up_new_task - wake up a newly created task for the first time.
 *
 * This function will do some initial scheduler statistics housekeeping
 * that must be done for every newly created context, then puts the task
 * on the runqueue and wakes it.
 */

8、检查clone_flags标志位。若当前标志位为 CLONE_VFORK(#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */),该标志位表明父进程想在内存释放后唤醒, wait_for_completion(&vfork);中有个自旋锁的操作,主要是等待由用户空间返回内核空间。

if (clone_flags & CLONE_VFORK) {
   freezer_do_not_count();
   wait_for_completion(&vfork);
   freezer_count();
   tracehook_report_vfork_done(p, nr);
  }

 9、完成所有操作,返回。其中返回值为新的进程的pid。


 } else {
  nr = PTR_ERR(p);
 }
 return nr;

10、下面是完整的程序。

/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          struct pt_regs *regs,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{
    struct task_struct *p;
    int trace = 0;
    long nr;

    /*
     * Do some preliminary argument and permissions checking before we
     * actually start allocating stuff
     */
    if (clone_flags & CLONE_NEWUSER) {
        if (clone_flags & CLONE_THREAD)
            return -EINVAL;
        /* hopefully this check will go away when userns support is
         * complete
         */
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
                !capable(CAP_SETGID))
            return -EPERM;
    }

    /*
     * We hope to recycle these flags after 2.6.26
     */
    if (unlikely(clone_flags & CLONE_STOPPED)) {
        static int __read_mostly count = 100;

        if (count > 0 && printk_ratelimit()) {
            char comm[TASK_COMM_LEN];

            count--;
            printk(KERN_INFO "fork(): process `%s' used deprecated "
                    "clone flags 0x%lx\n",
                get_task_comm(comm, current),
                clone_flags & CLONE_STOPPED);
        }
    }

    /*
     * When called from kernel_thread, don't do user tracing stuff.
     */
    if (likely(user_mode(regs)))
        trace = tracehook_prepare_clone(clone_flags);

    p = copy_process(clone_flags, stack_start, regs, stack_size,
             child_tidptr, NULL, trace);
    /*
     * Do this prior waking up the new thread - the thread pointer
     * might get invalid after that point, if the thread exits quickly.
     */
    if (!IS_ERR(p)) {
        struct completion vfork;

        trace_sched_process_fork(current, p);

        nr = task_pid_vnr(p);

        if (clone_flags & CLONE_PARENT_SETTID)
            put_user(nr, parent_tidptr);

        if (clone_flags & CLONE_VFORK) {
            p->vfork_done = &vfork;
            init_completion(&vfork);
        }

        audit_finish_fork(p);
        tracehook_report_clone(regs, clone_flags, nr, p);

        /*
         * We set PF_STARTING at creation in case tracing wants to
         * use this to distinguish a fully live task from one that
         * hasn't gotten to tracehook_report_clone() yet.  Now we
         * clear it and set the child going.
         */
        p->flags &= ~PF_STARTING;

        if (unlikely(clone_flags & CLONE_STOPPED)) {
            /*
             * We'll start up with an immediate SIGSTOP.
             */
            sigaddset(&p->pending.signal, SIGSTOP);
            set_tsk_thread_flag(p, TIF_SIGPENDING);
            __set_task_state(p, TASK_STOPPED);
        } else {
            wake_up_new_task(p, clone_flags);
        }

        tracehook_report_clone_complete(trace, regs,
                        clone_flags, nr, p);

        if (clone_flags & CLONE_VFORK) {
            freezer_do_not_count();
            wait_for_completion(&vfork);
            freezer_count();
            tracehook_report_vfork_done(p, nr);
        }
    } else {
        nr = PTR_ERR(p);
    }
    return nr;
}
View Code

 

你可能感兴趣的:(Linux内核进程创建do_fork()解析)