Linux提供了几个系统调用来创建和终止进程,以及执行新程序,他们分别是Fork,vfork,clone和exec,exit;其中clone用来创建轻量级进程,必须制定要共享的资源,exec系统调用执行一个新程序,exit系统调用终止进程。不论是fork,vfork还是clone,在内核中最终都是调用了do_fork来实现进程的创建。
asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
unsigned long r6, unsigned long r7,
struct pt_regs __regs)
{
#ifdef CONFIG_MMU
struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
return do_fork(SIGCHLD, regs->regs[15], regs, 0, NULL, NULL);
#else
/* fork almost works, enough to trick you into looking elsewhere :-( */
return -EINVAL;
#endif
}
asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
unsigned long parent_tidptr,
unsigned long child_tidptr,
struct pt_regs __regs)
{
struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
if (!newsp)
newsp = regs->regs[15];
return do_fork(clone_flags, newsp, regs, 0,
(int __user *)parent_tidptr,
(int __user *)child_tidptr);
}
asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
unsigned long r6, unsigned long r7,
struct pt_regs __regs)
{
struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->regs[15], regs,
0, NULL, NULL);
}
do_fork() 函数生成一个新的进程,大致分为三个步骤。
下面我们通过内核源码仔细讲解每个步骤都做了什么:
struct task_struct *p;
int trace = 0;
long nr;
/*
* Do some preliminary argument and permissions checking before we
* actually start allocating stuff
*/
if (clone_flags & CLONE_NEWUSER) {
if (clone_flags & CLONE_THREAD)
return -EINVAL;
/* hopefully this check will go away when userns support is
* complete
*/
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
!capable(CAP_SETGID))
return -EPERM;
}
/*
* We hope to recycle these flags after 2.6.26
*/
if (unlikely(clone_flags & CLONE_STOPPED)) {
static int __read_mostly count = 100;
if (count > 0 && printk_ratelimit()) {
char comm[TASK_COMM_LEN];
count--;
printk(KERN_INFO "fork(): process `%s' used deprecated "
"clone flags 0x%lx\n",
get_task_comm(comm, current),
clone_flags & CLONE_STOPPED);
}
}
/*
* When called from kernel_thread, don't do user tracing stuff.
*/
if (likely(user_mode(regs)))
trace = tracehook_prepare_clone(clone_flags);
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);
首先把新进程加入到进程链表中, 把新进程加入到 pidhash 散列表中,并增加任务计数值。通过拷贝父进程的上、下文来初始化硬件的上下文(TSS段、LDT以及 GDT)。
if (!IS_ERR(p)) {//函数 IS_ERR()分析copy_process()的返回值是否正确。
struct completion vfork;//定义struct completion 类型的变量 vfork;
trace_sched_process_fork(current, p);
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, parent_tidptr);
if (clone_flags & CLONE_VFORK) {//判断clone_flags中是否有CLONE_VFORK标志
p->vfork_done = &vfork;
init_completion(&vfork);/*这个函数的作用是在进程创建的最后阶段,父进程会将自己设置为不可中断状态,然后睡眠在
等待队列上(init_waitqueue_head()函数 就是将父进程加入到子进程的等待队列),等待子进程的唤醒。*/
}
audit_finish_fork(p);
tracehook_report_clone(regs, clone_flags, nr, p);
/*
* We set PF_STARTING at creation in case tracing wants to
* use this to distinguish a fully live task from one that
* hasn't gotten to tracehook_report_clone() yet. Now we
* clear it and set the child going.
*/
p->flags &= ~PF_STARTING;
if (unlikely(clone_flags & CLONE_STOPPED)) {
/*
* We'll start up with an immediate SIGSTOP.
*/
sigaddset(&p->pending.signal, SIGSTOP);
set_tsk_thread_flag(p, TIF_SIGPENDING);
__set_task_state(p, TASK_STOPPED);
} else {
wake_up_new_task(p, clone_flags);
}
tracehook_report_clone_complete(trace, regs,
clone_flags, nr, p);
if (clone_flags & CLONE_VFORK) {
freezer_do_not_count();
wait_for_completion(&vfork);
freezer_count();
tracehook_report_vfork_done(p, nr);
}
} else {
nr = PTR_ERR(p);
}
return nr;
}
(位于 kernel/fork.c 文件中,版本号:linux-2.6.32.65)
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;//在内存中分配一个 task_struct 数据结构,以代表即将产生的新进程
int trace = 0;
long nr;
/*
* Do some preliminary argument and permissions checking before we
* actually start allocating stuff
*/
if (clone_flags & CLONE_NEWUSER) { //clone and new user yes
if (clone_flags & CLONE_THREAD)
return -EINVAL;
/* hopefully this check will go away when userns support is
* complete
*/
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
!capable(CAP_SETGID))
return -EPERM;
}
/*
* We hope to recycle these flags after 2.6.26
*/
if (unlikely(clone_flags & CLONE_STOPPED)) {
static int __read_mostly count = 100;
if (count > 0 && printk_ratelimit()) {
char comm[TASK_COMM_LEN];
count--;
printk(KERN_INFO "fork(): process `%s' used deprecated "
"clone flags 0x%lx\n",
get_task_comm(comm, current),
clone_flags & CLONE_STOPPED);
}
}
/*
* When called from kernel_thread, don't do user tracing stuff.
*/
if (likely(user_mode(regs)))
trace = tracehook_prepare_clone(clone_flags);
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);//把父进程 PCB 的内容复制到新进程的 PCB 中。
/*通过copy_process()函数完成具体的进程创建工作,返回值类型为task_t类型
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {//函数 IS_ERR()分析copy_process()的返回值是否正确。
struct completion vfork;//定义struct completion 类型的变量 vfork;
trace_sched_process_fork(current, p);
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, parent_tidptr);
if (clone_flags & CLONE_VFORK) {//判断clone_flags中是否有CLONE_VFORK标志
p->vfork_done = &vfork;
init_completion(&vfork);/*这个函数的作用是在进程创建的最后阶段,父进程会将自己设置为不可中断状态,然后睡眠在
等待队列上(init_waitqueue_head()函数 就是将父进程加入到子进程的等待队列),等待子进程的唤醒。*/
}
audit_finish_fork(p);
tracehook_report_clone(regs, clone_flags, nr, p);
/*
* We set PF_STARTING at creation in case tracing wants to
* use this to distinguish a fully live task from one that
* hasn't gotten to tracehook_report_clone() yet. Now we
* clear it and set the child going.
*/
p->flags &= ~PF_STARTING;
if (unlikely(clone_flags & CLONE_STOPPED)) {
/*
* We'll start up with an immediate SIGSTOP.
*/
sigaddset(&p->pending.signal, SIGSTOP);
set_tsk_thread_flag(p, TIF_SIGPENDING);
__set_task_state(p, TASK_STOPPED);
} else {
wake_up_new_task(p, clone_flags);
}
tracehook_report_clone_complete(trace, regs,
clone_flags, nr, p);
if (clone_flags & CLONE_VFORK) {
freezer_do_not_count();
wait_for_completion(&vfork);
freezer_count();
tracehook_report_vfork_done(p, nr);
}
} else {
nr = PTR_ERR(p);
}
return nr;
}
想要看更加详细内容的可以点击下面:
引用文章