在linux c编程中,我们可以使用fork,vfork,clone三个系统调用来创建子进程。下面我们先分析下fork系统调用的实现原理。代码如下(kernel/fork.c):
1 #ifdef __ARCH_WANT_SYS_FORK 2 SYSCALL_DEFINE0(fork) 3 { 4 #ifdef CONFIG_MMU 5 return do_fork(SIGCHLD, 0, 0, NULL, NULL); 6 #else 7 /* can not support in nommu mode */ 8 return -EINVAL; 9 #endif 10 } 11 #endif
看见了么,linux中系统调用就是这样定义的。可以看见fork系统调用是do_fork函数的一个封装,它会通过软中断(系统门)的形式跳到内核函数do_fork中。下面我们来分析下do_fork函数(kernel/fork.c):
1 long do_fork(unsigned long clone_flags, 2 unsigned long stack_start, 3 unsigned long stack_size, 4 int __user *parent_tidptr, 5 int __user *child_tidptr) 6 { 7 struct task_struct *p; 8 int trace = 0; 9 long nr; 10 11 /* 12 * Determine whether and which event to report to ptracer. When 13 * called from kernel_thread or CLONE_UNTRACED is explicitly 14 * requested, no event is reported; otherwise, report if the event 15 * for the type of forking is enabled. 16 */ 17 if (!(clone_flags & CLONE_UNTRACED)) { 18 if (clone_flags & CLONE_VFORK) 19 trace = PTRACE_EVENT_VFORK; 20 else if ((clone_flags & CSIGNAL) != SIGCHLD) 21 trace = PTRACE_EVENT_CLONE; 22 else 23 trace = PTRACE_EVENT_FORK; 24 25 if (likely(!ptrace_event_enabled(current, trace))) 26 trace = 0; 27 } 28 29 p = copy_process(clone_flags, stack_start, stack_size, 30 child_tidptr, NULL, trace); 31 /* 32 * Do this prior waking up the new thread - the thread pointer 33 * might get invalid after that point, if the thread exits quickly. 34 */ 35 if (!IS_ERR(p)) { 36 struct completion vfork; 37 struct pid *pid; 38 39 trace_sched_process_fork(current, p); 40 41 pid = get_task_pid(p, PIDTYPE_PID); 42 nr = pid_vnr(pid); 43 44 if (clone_flags & CLONE_PARENT_SETTID) 45 put_user(nr, parent_tidptr); 46 47 if (clone_flags & CLONE_VFORK) { 48 p->vfork_done = &vfork; 49 init_completion(&vfork); 50 get_task_struct(p); 51 } 52 53 wake_up_new_task(p); 54 55 /* forking complete and child started to run, tell ptracer */ 56 if (unlikely(trace)) 57 ptrace_event_pid(trace, pid); 58 59 if (clone_flags & CLONE_VFORK) { 60 if (!wait_for_vfork_done(p, &vfork)) 61 ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); 62 } 63 64 put_pid(pid); 65 } else { 66 nr = PTR_ERR(p); 67 } 68 return nr; 69 }
第17-27行对函数接收到的标志进行判断,如果设置了追踪标志,则依据创建进程所使用的系统调用,在trace变量中放入不同的类型值。第29行创建子进程的描述符以及其他的数据结构,返回值为子进程的描述符指针,保存在变量p中,该函数可以说最核心的函数,待会来分析它。第35行,如果p指针没有出错,则进入if体。第41行获取子进程的pid号。第42行更新子进程的pid命名空间,并获得进程的pid号,存放在nr变量中。第44-45行,如果设置了CLONE_PARENT_SETTID标志,则将子进程的命名空间保存到父进程的一个用户态变量中,该变量的指针就是传进来的参数parent_tidptr。该函数中大部分是跟子进程追踪有关,我们暂且不分析这部分,主要分析下copy_process函数,代码如下(kernel/fork.c):
1 static struct task_struct *copy_process(unsigned long clone_flags, 2 unsigned long stack_start, 3 unsigned long stack_size, 4 int __user *child_tidptr, 5 struct pid *pid, 6 int trace) 7 { 8 int retval; 9 struct task_struct *p; 10 11 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 12 return ERR_PTR(-EINVAL); 13 14 if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) 15 return ERR_PTR(-EINVAL); 16 17 /* 18 * Thread groups must share signals as well, and detached threads 19 * can only be started up within the thread group. 20 */ 21 if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 22 return ERR_PTR(-EINVAL); 23 24 /* 25 * Shared signal handlers imply shared VM. By way of the above, 26 * thread groups also imply shared VM. Blocking this case allows 27 * for various simplifications in other code. 28 */ 29 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 30 return ERR_PTR(-EINVAL); 31 32 /* 33 * Siblings of global init remain as zombies on exit since they are 34 * not reaped by their parent (swapper). To solve this and to avoid 35 * multi-rooted process trees, prevent global and container-inits 36 * from creating siblings. 37 */ 38 if ((clone_flags & CLONE_PARENT) && 39 current->signal->flags & SIGNAL_UNKILLABLE) 40 return ERR_PTR(-EINVAL); 41 42 /* 43 * If the new process will be in a different pid or user namespace 44 * do not allow it to share a thread group or signal handlers or 45 * parent with the forking task. 46 */ 47 if (clone_flags & CLONE_SIGHAND) { 48 if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || 49 (task_active_pid_ns(current) != 50 current->nsproxy->pid_ns_for_children)) 51 return ERR_PTR(-EINVAL); 52 } 53 54 retval = security_task_create(clone_flags); 55 if (retval) 56 goto fork_out; 57 58 retval = -ENOMEM; 59 p = dup_task_struct(current); 60 if (!p) 61 goto fork_out; 62 63 ftrace_graph_init_task(p); 64 get_seccomp_filter(p); 65 66 rt_mutex_init_task(p); 67 68 #ifdef CONFIG_PROVE_LOCKING 69 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 70 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 71 #endif 72 retval = -EAGAIN; 73 if (atomic_read(&p->real_cred->user->processes) >= 74 task_rlimit(p, RLIMIT_NPROC)) { 75 if (p->real_cred->user != INIT_USER && 76 !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) 77 goto bad_fork_free; 78 } 79 current->flags &= ~PF_NPROC_EXCEEDED; 80 81 retval = copy_creds(p, clone_flags); 82 if (retval < 0) 83 goto bad_fork_free; 84 85 /* 86 * If multiple threads are within copy_process(), then this check 87 * triggers too late. This doesn't hurt, the check is only there 88 * to stop root fork bombs. 89 */ 90 retval = -EAGAIN; 91 if (nr_threads >= max_threads) 92 goto bad_fork_cleanup_count; 93 94 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 95 goto bad_fork_cleanup_count; 96 97 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 98 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); 99 p->flags |= PF_FORKNOEXEC; 100 INIT_LIST_HEAD(&p->children); 101 INIT_LIST_HEAD(&p->sibling); 102 rcu_copy_process(p); 103 p->vfork_done = NULL; 104 spin_lock_init(&p->alloc_lock); 105 106 init_sigpending(&p->pending); 107 108 p->utime = p->stime = p->gtime = 0; 109 p->utimescaled = p->stimescaled = 0; 110 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 111 p->prev_cputime.utime = p->prev_cputime.stime = 0; 112 #endif 113 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 114 seqlock_init(&p->vtime_seqlock); 115 p->vtime_snap = 0; 116 p->vtime_snap_whence = VTIME_SLEEPING; 117 #endif 118 119 #if defined(SPLIT_RSS_COUNTING) 120 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 121 #endif 122 123 p->default_timer_slack_ns = current->timer_slack_ns; 124 125 task_io_accounting_init(&p->ioac); 126 acct_clear_integrals(p); 127 128 posix_cpu_timers_init(p); 129 130 do_posix_clock_monotonic_gettime(&p->start_time); 131 p->real_start_time = p->start_time; 132 monotonic_to_bootbased(&p->real_start_time); 133 p->io_context = NULL; 134 p->audit_context = NULL; 135 if (clone_flags & CLONE_THREAD) 136 threadgroup_change_begin(current); 137 cgroup_fork(p); 138 #ifdef CONFIG_NUMA 139 p->mempolicy = mpol_dup(p->mempolicy); 140 if (IS_ERR(p->mempolicy)) { 141 retval = PTR_ERR(p->mempolicy); 142 p->mempolicy = NULL; 143 goto bad_fork_cleanup_threadgroup_lock; 144 } 145 #endif 146 #ifdef CONFIG_CPUSETS 147 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; 148 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; 149 seqcount_init(&p->mems_allowed_seq); 150 #endif 151 #ifdef CONFIG_TRACE_IRQFLAGS 152 p->irq_events = 0; 153 p->hardirqs_enabled = 0; 154 p->hardirq_enable_ip = 0; 155 p->hardirq_enable_event = 0; 156 p->hardirq_disable_ip = _THIS_IP_; 157 p->hardirq_disable_event = 0; 158 p->softirqs_enabled = 1; 159 p->softirq_enable_ip = _THIS_IP_; 160 p->softirq_enable_event = 0; 161 p->softirq_disable_ip = 0; 162 p->softirq_disable_event = 0; 163 p->hardirq_context = 0; 164 p->softirq_context = 0; 165 #endif 166 #ifdef CONFIG_LOCKDEP 167 p->lockdep_depth = 0; /* no locks held yet */ 168 p->curr_chain_key = 0; 169 p->lockdep_recursion = 0; 170 #endif 171 172 #ifdef CONFIG_DEBUG_MUTEXES 173 p->blocked_on = NULL; /* not blocked yet */ 174 #endif 175 #ifdef CONFIG_MEMCG 176 p->memcg_batch.do_batch = 0; 177 p->memcg_batch.memcg = NULL; 178 #endif 179 #ifdef CONFIG_BCACHE 180 p->sequential_io = 0; 181 p->sequential_io_avg = 0; 182 #endif 183 184 /* Perform scheduler related setup. Assign this task to a CPU. */ 185 retval = sched_fork(clone_flags, p); 186 if (retval) 187 goto bad_fork_cleanup_policy; 188 189 retval = perf_event_init_task(p); 190 if (retval) 191 goto bad_fork_cleanup_policy; 192 retval = audit_alloc(p); 193 if (retval) 194 goto bad_fork_cleanup_policy; 195 /* copy all the process information */ 196 retval = copy_semundo(clone_flags, p); 197 if (retval) 198 goto bad_fork_cleanup_audit; 199 retval = copy_files(clone_flags, p); 200 if (retval) 201 goto bad_fork_cleanup_semundo; 202 retval = copy_fs(clone_flags, p); 203 if (retval) 204 goto bad_fork_cleanup_files; 205 retval = copy_sighand(clone_flags, p); 206 if (retval) 207 goto bad_fork_cleanup_fs; 208 retval = copy_signal(clone_flags, p); 209 if (retval) 210 goto bad_fork_cleanup_sighand; 211 retval = copy_mm(clone_flags, p); 212 if (retval) 213 goto bad_fork_cleanup_signal; 214 retval = copy_namespaces(clone_flags, p); 215 if (retval) 216 goto bad_fork_cleanup_mm; 217 retval = copy_io(clone_flags, p); 218 if (retval) 219 goto bad_fork_cleanup_namespaces; 220 retval = copy_thread(clone_flags, stack_start, stack_size, p); 221 if (retval) 222 goto bad_fork_cleanup_io; 223 224 if (pid != &init_struct_pid) { 225 retval = -ENOMEM; 226 pid = alloc_pid(p->nsproxy->pid_ns_for_children); 227 if (!pid) 228 goto bad_fork_cleanup_io; 229 } 230 231 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 232 /* 233 * Clear TID on mm_release()? 234 */ 235 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; 236 #ifdef CONFIG_BLOCK 237 p->plug = NULL; 238 #endif 239 #ifdef CONFIG_FUTEX 240 p->robust_list = NULL; 241 #ifdef CONFIG_COMPAT 242 p->compat_robust_list = NULL; 243 #endif 244 INIT_LIST_HEAD(&p->pi_state_list); 245 p->pi_state_cache = NULL; 246 #endif 247 /* 248 * sigaltstack should be cleared when sharing the same VM 249 */ 250 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) 251 p->sas_ss_sp = p->sas_ss_size = 0; 252 253 /* 254 * Syscall tracing and stepping should be turned off in the 255 * child regardless of CLONE_PTRACE. 256 */ 257 user_disable_single_step(p); 258 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 259 #ifdef TIF_SYSCALL_EMU 260 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 261 #endif 262 clear_all_latency_tracing(p); 263 264 /* ok, now we should be set up.. */ 265 p->pid = pid_nr(pid); 266 if (clone_flags & CLONE_THREAD) { 267 p->exit_signal = -1; 268 p->group_leader = current->group_leader; 269 p->tgid = current->tgid; 270 } else { 271 if (clone_flags & CLONE_PARENT) 272 p->exit_signal = current->group_leader->exit_signal; 273 else 274 p->exit_signal = (clone_flags & CSIGNAL); 275 p->group_leader = p; 276 p->tgid = p->pid; 277 } 278 279 p->nr_dirtied = 0; 280 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); 281 p->dirty_paused_when = 0; 282 283 p->pdeath_signal = 0; 284 INIT_LIST_HEAD(&p->thread_group); 285 p->task_works = NULL; 286 287 /* 288 * Make it visible to the rest of the system, but dont wake it up yet. 289 * Need tasklist lock for parent etc handling! 290 */ 291 write_lock_irq(&tasklist_lock); 292 293 /* CLONE_PARENT re-uses the old parent */ 294 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 295 p->real_parent = current->real_parent; 296 p->parent_exec_id = current->parent_exec_id; 297 } else { 298 p->real_parent = current; 299 p->parent_exec_id = current->self_exec_id; 300 } 301 302 spin_lock(¤t->sighand->siglock); 303 304 /* 305 * Process group and session signals need to be delivered to just the 306 * parent before the fork or both the parent and the child after the 307 * fork. Restart if a signal comes in before we add the new process to 308 * it's process group. 309 * A fatal signal pending means that current will exit, so the new 310 * thread can't slip out of an OOM kill (or normal SIGKILL). 311 */ 312 recalc_sigpending(); 313 if (signal_pending(current)) { 314 spin_unlock(¤t->sighand->siglock); 315 write_unlock_irq(&tasklist_lock); 316 retval = -ERESTARTNOINTR; 317 goto bad_fork_free_pid; 318 } 319 320 if (likely(p->pid)) { 321 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 322 323 init_task_pid(p, PIDTYPE_PID, pid); 324 if (thread_group_leader(p)) { 325 init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); 326 init_task_pid(p, PIDTYPE_SID, task_session(current)); 327 328 if (is_child_reaper(pid)) { 329 ns_of_pid(pid)->child_reaper = p; 330 p->signal->flags |= SIGNAL_UNKILLABLE; 331 } 332 333 p->signal->leader_pid = pid; 334 p->signal->tty = tty_kref_get(current->signal->tty); 335 list_add_tail(&p->sibling, &p->real_parent->children); 336 list_add_tail_rcu(&p->tasks, &init_task.tasks); 337 attach_pid(p, PIDTYPE_PGID); 338 attach_pid(p, PIDTYPE_SID); 339 __this_cpu_inc(process_counts); 340 } else { 341 current->signal->nr_threads++; 342 atomic_inc(¤t->signal->live); 343 atomic_inc(¤t->signal->sigcnt); 344 list_add_tail_rcu(&p->thread_group, 345 &p->group_leader->thread_group); 346 list_add_tail_rcu(&p->thread_node, 347 &p->signal->thread_head); 348 } 349 attach_pid(p, PIDTYPE_PID); 350 nr_threads++; 351 } 352 353 total_forks++; 354 spin_unlock(¤t->sighand->siglock); 355 syscall_tracepoint_update(p); 356 write_unlock_irq(&tasklist_lock); 357 358 proc_fork_connector(p); 359 cgroup_post_fork(p); 360 if (clone_flags & CLONE_THREAD) 361 threadgroup_change_end(current); 362 perf_event_fork(p); 363 364 trace_task_newtask(p, clone_flags); 365 uprobe_copy_process(p, clone_flags); 366 367 return p; 368 369 bad_fork_free_pid: 370 if (pid != &init_struct_pid) 371 free_pid(pid); 372 bad_fork_cleanup_io: 373 if (p->io_context) 374 exit_io_context(p); 375 bad_fork_cleanup_namespaces: 376 exit_task_namespaces(p); 377 bad_fork_cleanup_mm: 378 if (p->mm) 379 mmput(p->mm); 380 bad_fork_cleanup_signal: 381 if (!(clone_flags & CLONE_THREAD)) 382 free_signal_struct(p->signal); 383 bad_fork_cleanup_sighand: 384 __cleanup_sighand(p->sighand); 385 bad_fork_cleanup_fs: 386 exit_fs(p); /* blocking */ 387 bad_fork_cleanup_files: 388 exit_files(p); /* blocking */ 389 bad_fork_cleanup_semundo: 390 exit_sem(p); 391 bad_fork_cleanup_audit: 392 audit_free(p); 393 bad_fork_cleanup_policy: 394 perf_event_free_task(p); 395 #ifdef CONFIG_NUMA 396 mpol_put(p->mempolicy); 397 bad_fork_cleanup_threadgroup_lock: 398 #endif 399 if (clone_flags & CLONE_THREAD) 400 threadgroup_change_end(current); 401 delayacct_tsk_free(p); 402 module_put(task_thread_info(p)->exec_domain->module); 403 bad_fork_cleanup_count: 404 atomic_dec(&p->cred->user->processes); 405 exit_creds(p); 406 bad_fork_free: 407 free_task(p); 408 fork_out: 409 return ERR_PTR(retval); 410 }
第11-52行对传递进来的clone_flags标志进行一致性检查,如果有错误,返回错误代码。第54行对clone_flags进行附加性安全检查。第59行为子进程创建进程描述符。第63行,这个版本的linux内核源码中该函数为空函数,不研究它。第64行增加子进程p的seccomp.filter结构体成员的引用数量。第66行初始化子进程中的互斥锁。第73-77行比较子进程的拥有者所拥有的进程数量是否超过限制,如果超过了限制并且该拥有者不是root用户,就会跳到错误处理代码。第81行将父进程的拥有者信息拷贝给子进程。第91行检查系统中的进程数量是否超过了限制,nr_threads是内核中的全局变量,存放着进程的总数。第94行检查子进程的执行域(要执行的代码)所在的模块是否已经加载进了内核,如果没有的话就进行出错处理。第98-99行对子进程flags进程初始化。第100-101对子进程的孩子链表和兄弟链表进行初始化。第102行初始化子进程的rcu(一种锁机制)。第104行初始化子进程的alloc_lock自旋锁。第106行初始化进程的挂起信号集。第108-116对进行的各种时间进程初始化。第125行初始化子进程的io计数。第126行对子进程的多个计数域进行清零。第128行初始化子进程的定时器。第185-222行,使用了多个copy_***函数创建了子进程要用的各种数据结构,并将父进程的相应结构体中的内容拷贝进来,或者根据flags指定的内容来初始化子进程的这些数据结构。第224-228行,如果传进来的pid指针和全局结构体变量init_struct_pid的地址不相同的话,就要为子进程分配pid结构体。第265行从pid结构体中获取到子进程的pid号,保存到子进程描述符的pid域。第266-269行,如果设置了CLONE_THREAD标志,说明子进程和父进程在同一个线程组,那么子进程将继承父进程的tgid,否则第275-276行,子进程的组领导者就是它自己,组号tgpid是它自己的pid号。第279-281行设置一些和进程脏页限制有关的成员。下面291-356行的代码需要关掉中断并且获取写锁。第294-296行对进程亲子关系初始化,如果设置了CLONE_PARENT和CLONE_THREAD标志,子进程的真实父进程设置为它的父进程的真实父进程(它的爷爷进程),否则第298-299行,子进程的真实父进程设置为它的父进程。第302行-354行需要获取自旋锁。第321行初始化子进程的ptraced字段(和进程追踪有关)。第323行,对子进程的pid结构体进行初始化。第324-340行,如果子进程是线程组的组长,则对子进程进行相应处理,其中,第325-326行将子进程的进程组和会话组的组长分别设置为父进程的组领导的进程组和会话组组长,第337-338行,将子进程加入它所在组的哈希链表中。否则,子进程不是线程组组长的话,第341-347行不对子进程做pid相关的操作。第349行,同样要将子进程加入到pid哈希表中。第350行全局变量nr_threads自加1,说明系统中的进程又多了一个。如果整个函数内部没有出错的话,第367行返回创建好的子进程描述符指针p。第369行到结束,均为错误处理的代码,如果函数中有出错的地方,就会跳到这些代码中。
下面我们接着分析第59行的dup_task_struct函数,代码如下(kernel/fork.c):
1 static struct task_struct *dup_task_struct(struct task_struct *orig) 2 { 3 struct task_struct *tsk; 4 struct thread_info *ti; 5 unsigned long *stackend; 6 int node = tsk_fork_get_node(orig); 7 int err; 8 9 tsk = alloc_task_struct_node(node); 10 if (!tsk) 11 return NULL; 12 13 ti = alloc_thread_info_node(tsk, node); 14 if (!ti) 15 goto free_tsk; 16 17 err = arch_dup_task_struct(tsk, orig); 18 if (err) 19 goto free_ti; 20 21 tsk->stack = ti; 22 23 setup_thread_stack(tsk, orig); 24 clear_user_return_notifier(tsk); 25 clear_tsk_need_resched(tsk); 26 stackend = end_of_stack(tsk); 27 *stackend = STACK_END_MAGIC; /* for overflow detection */ 28 29 #ifdef CONFIG_CC_STACKPROTECTOR 30 tsk->stack_canary = get_random_int(); 31 #endif 32 33 /* 34 * One for us, one for whoever does the "release_task()" (usually 35 * parent) 36 */ 37 atomic_set(&tsk->usage, 2); 38 #ifdef CONFIG_BLK_DEV_IO_TRACE 39 tsk->btrace_seq = 0; 40 #endif 41 tsk->splice_pipe = NULL; 42 tsk->task_frag.page = NULL; 43 44 account_kernel_stack(ti, 1); 45 46 return tsk; 47 48 free_ti: 49 free_thread_info(ti); 50 free_tsk: 51 free_task_struct(tsk); 52 return NULL; 53 }
在该函数中创建了子进程的进程描述符,并将描述符指针返回。第6行,如果使用到了NUMA技术的话,函数返回父进程描述符的pref_node_fork字段值,否则,返回-1。第9行为子进程创建进程描述符,将描述符指针保存在tsk变量中。第13行为子进程创建thread_info结构体(也就是创建内核栈),并返回结构体指针(该指针实际上也是子进程堆栈的内核栈指针)。第17行将父进程描述符的各个成员值全部复制给子进程描述符的成员。第21行将子进程内核栈地址ti赋给stack成员。第23行对子进程的thread_info结构体进行初始化,将父进程的该结构体成员值复制给子进程,然后将子进程的描述符指针存入thread_info结构体的task域(从thread_info就可找到进程描述符了)。第25行清除子进程thread_info结构体中的flag标志(具体而言,就是need_resched标志,取消内核抢占)。第26行将自进程内核栈顶指针保存到stackend变量中。第27行向子进程内核堆栈中存入STACK_END_MAGIC,来进行堆栈溢出检测。第23行将子进程描述符使用计数置为2(一个表示父进程的引用,一个表示自己)。第44行将子进程的内核栈所在的zone结构体使用计数置为1。第46行返回子进程描述符指针。接下来,我们回到do_fork函数中,第53行wake_up_new_task函数会将刚创建好的子进程加入到运行队列中并且唤醒它,下面看看该函数(kernel/sched/core.c)。
1 void wake_up_new_task(struct task_struct *p) 2 { 3 unsigned long flags; 4 struct rq *rq; 5 6 raw_spin_lock_irqsave(&p->pi_lock, flags); 7 #ifdef CONFIG_SMP 8 /* 9 * Fork balancing, do it here and not earlier because: 10 * - cpus_allowed can change in the fork path 11 * - any previously selected cpu might disappear through hotplug 12 */ 13 set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); 14 #endif 15 16 /* Initialize new task's runnable average */ 17 init_task_runnable_average(p); 18 rq = __task_rq_lock(p); 19 activate_task(rq, p, 0); 20 p->on_rq = 1; 21 trace_sched_wakeup_new(p, true); 22 check_preempt_curr(rq, p, WF_FORK); 23 #ifdef CONFIG_SMP 24 if (p->sched_class->task_woken) 25 p->sched_class->task_woken(rq, p); 26 #endif 27 task_rq_unlock(rq, p, &flags); 28 }
该函数暂且放到这里,带我把进程调度这块的博文完成之后,再完善这里。该函数执行完后,第68行父进程返回子进程的pid号,回到do_fork中,子进程创建的工作就完成了。此后父子进程公平的参与进程调度。