linux 2.6线程创建源码分析

上章讲到线程,现在对线程创建的代码流程分析下。来一步一步揭开她神秘的面纱

linux内核创建线程函数 kernel_thread(),最终会调用do_fork().
前面谈到线程也是用task_struct结构表示它拥有的信息,只是是共享进程的资源。
根据clone_flags标志,来调用clone()创建"线程",表示共享内存、共享文件系统访问计数、共享文件描述符表,以及共享信号处理方式。

kernel_thread定义在/arch/kernel/process.c
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
       struct pt_regs regs;   

       memset(&regs, 0, sizeof(regs));         //把该结构的变量全部清0

       regs.ebx = (unsigned long) fn;          /* ebx指向函数地址 */
       regs.edx = (unsigned long) arg;         /* edx指向参数地址 */

       regs.xds = __USER_DS;
       regs.xes = __USER_DS;
       regs.xfs = __KERNEL_PERCPU;
       regs.orig_eax = -1;
       regs.eip = (unsigned long) kernel_thread_helper;       /*eip指向回调函数*/
       regs.xcs = __KERNEL_CS | get_kernel_rpl();
       regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;


       /* 利用do_fork来产生一个新的线程,共享父进程地址空间,并且不允许调试子进程 */
       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
}


其中__USER_DS,__KERNEL_PERCPU,__KERNEL_CS都是一些宏定义。在/linux/include/asm-i386/segment.h


extern void kernel_thread_helper(void);  /* 定义成全局变量 */
__asm__(".section .text\n"
    ".align 4\n"
    "kernel_thread_helper:\n\t"
    "movl %edx,%eax\n\t"
    "pushl %edx\n\t"   /* edx指向参数,压入堆栈 */
    "call *%ebx\n\t"   /* ebx指向函数地址,执行函数 */
    "pushl %eax\n\t"
    "call do_exit\n"   /* 结束线程 */
    ".previous");
   
在kernel_thread中调用了do_fork,让我们揭开do_fork()的面纱.
long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          struct pt_regs *regs,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{
    ...
    ...
    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
    ...
    ...
}

   
接着分析do_fork(),copy_proces()是它的核心函数。重点分析一下:
static struct task_struct *copy_process(unsigned long clone_flags,
                       unsigned long stack_start,
                       struct pt_regs *regs,
                       unsigned long stack_size,
                       int __user *parent_tidptr,
                       int __user *child_tidptr,
                       struct pid *pid)
{
     int retval;
     struct task_struct *p = NULL;

     //clone_flags参数的有效性判断

     //不能同时定义CLONE_NEWNS,CLONE_FS
     if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
         return ERR_PTR(-EINVAL);
//如果定义CLONE_THREAD,则必须要定义CLONE_SIGHAND
     if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
         return ERR_PTR(-EINVAL);
//如果定义CLONE_SIGHAND,则必须要定义CLONE_VM
     if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
         return ERR_PTR(-EINVAL);

     retval = security_task_create(clone_flags);
     if (retval)
         goto fork_out;

     retval = -ENOMEM;
     //从父进程中复制出一个task
     p = dup_task_struct(current);
     if (!p)
         goto fork_out;

     rt_mutex_init_task(p);

#ifdef CONFIG_TRACE_IRQFLAGS
     DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
     DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
     retval = -EAGAIN;

     //如果用户的进程总数超过了限制
     if (atomic_read(&p->user->processes) >=
              p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
         if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
             p->user != current->nsproxy->user_ns->root_user)
              goto bad_fork_free;
     }

     //更新进程用户的相关计数
     atomic_inc(&p->user->__count);
     atomic_inc(&p->user->processes);
     get_group_info(p->group_info);
      //当前进程数是否大于系统规定的最大进程数
     if (nr_threads >= max_threads)
         goto bad_fork_cleanup_count;

     //加载进程的相关执行模块
     if (!try_module_get(task_thread_info(p)->exec_domain->module))
         goto bad_fork_cleanup_count;

     if (p->binfmt && !try_module_get(p->binfmt->module))
         goto bad_fork_cleanup_put_domain;

     //子进程还在进行初始化,没有execve
     p->did_exec = 0;
     delayacct_tsk_init(p); /* Must remain after dup_task_struct() */

     //copy父进程的所有标志,除了PF_SUPERPRIV(超级权限)
     //置子进程的PF_FORKNOEXEC标志,表示正在被FORK
     copy_flags(clone_flags, p);

     //赋值子进程的pid
     p->pid = pid_nr(pid);
     retval = -EFAULT;
     if (clone_flags & CLONE_PARENT_SETTID)
         if (put_user(p->pid, parent_tidptr))
              goto bad_fork_cleanup_delays_binfmt;

     //初始化子进程的几个链表
     INIT_LIST_HEAD(&p->children);
     INIT_LIST_HEAD(&p->sibling);
     p->vfork_done = NULL;
     spin_lock_init(&p->alloc_lock);

     //父进程的TIF_SIGPENDING被复制进了子进程,这个标志表示有末处理的信号
     //这个标志子进程是不需要的
     clear_tsk_thread_flag(p, TIF_SIGPENDING);
     init_sigpending(&p->pending);

     //初始化子进程的time
     p->utime = cputime_zero;
     p->stime = cputime_zero;
     p->prev_utime = cputime_zero;
……
……
//tgid = pid
     p->tgid = p->pid;
     if (clone_flags & CLONE_THREAD)
         p->tgid = current->tgid;

     //copy父进程的其它资源.比例打开的文件,信号,VM等等
     if ((retval = security_task_alloc(p)))
          goto bad_fork_cleanup_policy;
     if ((retval = audit_alloc(p)))
         goto bad_fork_cleanup_security;
     /* copy all the process information */
     if ((retval = copy_semundo(clone_flags, p)))
         goto bad_fork_cleanup_audit;
     if ((retval = copy_files(clone_flags, p)))
         goto bad_fork_cleanup_semundo;
     if ((retval = copy_fs(clone_flags, p)))
         goto bad_fork_cleanup_files;
     if ((retval = copy_sighand(clone_flags, p)))
         goto bad_fork_cleanup_fs;
     if ((retval = copy_signal(clone_flags, p)))
         goto bad_fork_cleanup_sighand;
     if ((retval = copy_mm(clone_flags, p)))
         goto bad_fork_cleanup_signal;
     if ((retval = copy_keys(clone_flags, p)))
         goto bad_fork_cleanup_mm;
     if ((retval = copy_namespaces(clone_flags, p)))
         goto bad_fork_cleanup_keys;
     retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
     if (retval)
         goto bad_fork_cleanup_namespaces;

     p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
     /*
      * Clear TID on mm_release()?
      */
     p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
     p->robust_list = NULL;
#ifdef CONFIG_COMPAT
     p->compat_robust_list = NULL;
#endif
     INIT_LIST_HEAD(&p->pi_state_list);
     p->pi_state_cache = NULL;

     /*
      * sigaltstack should be cleared when sharing the same VM
      */
     if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
         p->sas_ss_sp = p->sas_ss_size = 0;

     /*
      * Syscall tracing should be turned off in the child regardless
      * of CLONE_PTRACE.
      */
     clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
     clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif

     /* Our parent execution domain becomes current domain
        These must match for thread signalling to apply */
     p->parent_exec_id = p->self_exec_id;

     /* ok, now we should be set up.. */
     //exit_signal: 子进程退出时给父进程发送的信号
     p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
     //pdeath_signal:进程退出时.给其下的子进程发送的信号
     p->pdeath_signal = 0;
     p->exit_state = 0;

     ……
     ……

     if (likely(p->pid)) {
         add_parent(p);
         if (unlikely(p->ptrace & PT_PTRACED))
              __ptrace_link(p, current->parent);

         if (thread_group_leader(p)) {
              p->signal->tty = current->signal->tty;
              p->signal->pgrp = process_group(current);
              set_signal_session(p->signal, process_session(current));
              attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
              attach_pid(p, PIDTYPE_SID, task_session(current));

              list_add_tail_rcu(&p->tasks, &init_task.tasks);
              __get_cpu_var(process_counts)++;
         }
         attach_pid(p, PIDTYPE_PID, pid);
         //当前进程数递增
         nr_threads++;
     }

     //被fork的进程数计数递增
     total_forks++;
     spin_unlock(&current->sighand->siglock);
     write_unlock_irq(&tasklist_lock);
     proc_fork_connector(p);
     return p;

……
……
}

参考:深入理解linux内核





到这里为止,进程的运行内间已经设置好了。但子进程的怎么返回到用户空间呢?这是在copy_process()—> copy_thread()中完成的。
int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
     unsigned long unused,
     struct task_struct * p, struct pt_regs * regs)
{
     struct pt_regs * childregs;
     struct task_struct *tsk;
     int err;

     //子进程的内核堆栈起点
     childregs = task_pt_regs(p);
     //将父进程的regs参数赋值到子进程的内核堆栈
     //regs参数:里面存放的是父进程陷入内核后的各寄存器的值
     *childregs = *regs;
     //eax:返回值. 将其设为0,子进程返回到用户空间后,它的返回值是0
     childregs->eax = 0;
     //esp:子进程的用户堆栈指针位置
     childregs->esp = esp;

     //子进程内核堆栈位置
     p->thread.esp = (unsigned long) childregs;
     //子进程内核堆栈指针位置
     p->thread.esp0 = (unsigned long) (childregs+1);
     //子进程要执行的下一条指令.对应子进程从系统空间返回用户空间
     p->thread.eip = (unsigned long) ret_from_fork;

     savesegment(gs,p->thread.gs);

     tsk = current;
     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
         p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
                            IO_BITMAP_BYTES, GFP_KERNEL);
         if (!p->thread.io_bitmap_ptr) {
              p->thread.io_bitmap_max = 0;
              return -ENOMEM;
         }
         set_tsk_thread_flag(p, TIF_IO_BITMAP);
     }

     /*
      * Set a new TLS for the child thread?
      */
     if (clone_flags & CLONE_SETTLS) {
         struct desc_struct *desc;
         struct user_desc info;
         int idx;

         err = -EFAULT;
         if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
              goto out;
         err = -EINVAL;
         if (LDT_empty(&info))
              goto out;

         idx = info.entry_number;
         if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
              goto out;

         desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
         desc->a = LDT_entry_a(&info);
         desc->b = LDT_entry_b(&info);
     }

     err = 0;
out:
     if (err && p->thread.io_bitmap_ptr) {
         kfree(p->thread.io_bitmap_ptr);
         p->thread.io_bitmap_max = 0;
     }
     return err;
}



在这里把ret_from_fork的地址赋值给p->thread.eip,p->thread.eip表示当进程下一次调度时的指令开始地址,
所以当线程创建后被调度时,是从ret_from_fork地址处开始的.

到这里说明,新的线程已产生了.
ENTRY(ret_from_fork)
    pushl %eax
    call schedule_tail
    GET_THREAD_INFO(%ebp)
    popl %eax
    jmp syscall_exit
syscall_exit:
...
work_resched:
    call schedule
...
当他从ret_from_fork退出时,会从堆栈中弹出原来保存的eip,而ip指向kernel_thread_helper,
至此kernel_thread_helper被调用,他就能够运行我们的指定的函数了do_exit().
从内核空间返回到用户空间。

你可能感兴趣的:(thread,linux,cache,Security,UP)