copy_process函数在进程创建的do_fork函数中调用,主要完成进程数据结构,各种资源的初始化。初始化方式可以重新分配,也可以共享父进程资源,主要根据传入CLONE参数来确定。
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; /*下面为参数有效性检查*/ if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /*Check permission before creating a child process. See the clone(2) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted.*/ retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; /*为新进程创建一个内核栈、thread_iofo和task_struct, 这里完全copy父进程的内容,所以到目前为止, 父进程和子进程是没有任何区别的。,从这里也 可以看出,这个版本的内核在创建时需要为自己 分配内核栈*/ p = dup_task_struct(current); if (!p) goto fork_out; /*task结构中ftrace_ret_stack结构变量的初始化,即函数 返回用的栈*/ ftrace_graph_init_task(p); /*task中互斥变量的初始化*/ rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING/*not set*/ DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; /*首先看前面的两个if,第一个if里面的rlim数组包含在task_sturct数组中。 对进程占用的资源数做出限制,rlim[RLIMIT_NPROC]限制了改进程用户 可以拥有的总进程数量,如果当前用户所拥有的进程数量超过了 规定的最大拥有进程数量,在内核中就直接goto bad_fork_free了。 第2个if使用了capable()函数来对权限做出检查,检查是否有权 对指定的资源进行操作,该函数返回0则代表无权操作。*/ if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } /*Copy credentials for the new process*/ retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; /*检查创建的进程是否超过了系统进程总量*/ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ /*更新task_struct结构中flags成员*/ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p);/*rcu相关变量的初始化*/ p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac);/*task中io数据记录的初始化*/ acct_clear_integrals(p);/*clear the mm integral fields in task_struct*/ posix_cpu_timers_init(p);/*timer初始化*/ p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p);/*attach newly forked task to its parents cgroup.*/ #ifdef CONFIG_NUMA/*not set*/ p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS/*yes*/ p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW/*not found*/ p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP/*yes*/ p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES/*not set */ p->blocked_on = NULL; /* not blocked yet */ #endif p->bts = NULL; p->stack_start = stack_start; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags);/*调度相关初始化*/ /* * Initialize the perf_event context in task_struct */ retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /*下面的操作中,根据flag中是否设置了相关标志 进行重新分配或者共享父进程的内容*/ /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } p->pid = pid_nr(pid); p->tgid = p->pid; /*如果设置了同在一个线程组则继承TGID。 对于普通进程来说TGID和PID相等, 对于线程来说,同一线程组内的所有线程的TGID都相等, 这使得这些多线程可以通过调用getpid()获得相同的PID。*/ if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; /*如果命名空间不相等,clone the cgroup the given subsystem is attached to*/ if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_pid; } /*如果设置了CLONE_CHILD_SETTID,将变量设置为参数*/ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX/*yes*/ p->robust_list = NULL; #ifdef CONFIG_COMPAT/*not found*/ p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU/*not found*/ clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ /*如果这两个标志设定了,那么和父进程 有相同的父进程*/ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else {/*父进程为实际父进程*/ p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } /*如果和父进程有相同的线程组*/ if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); p->group_leader = current->group_leader;/*和父进程相同的组领导*/ /*将进程加入组leader的链表尾部*/ list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { /*保持进程应有的关系*/ list_add_tail(&p->sibling, &p->real_parent->children); /*ptrace的相关初始化*/ tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) {/*如果进程p时线程组leader*/ if (clone_flags & CLONE_NEWPID) /*如果是新创建的pid命名空间,那么将 命名空间的child_reaper变量置为p,从这里看出 这个变量设置为创建者进程控制块*/ p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty);/*释放tty引用*/ p->signal->tty = tty_kref_get(current->signal->tty);/*得到tty引用*/ /*下面两行为加入对应的pid哈希表,关于pid 的组织参考前一篇文章*/ attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks);/*加入队列*/ __get_cpu_var(process_counts)++;/*将per cpu变量加一*/ } attach_pid(p, PIDTYPE_PID, pid);/*维护pid变量*/ nr_threads++;/*线程数加一*/ } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); /*Adds the task to the list running through its css_set if necessary.*/ cgroup_post_fork(p); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) __cleanup_signal(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; unsigned long *stackend; int err; /*fpu相关操作*/ prepare_to_copy(orig); /*通过alloc_task_struct()函数创建task_struct结构空间*/ tsk = alloc_task_struct(); if (!tsk) return NULL; /*分配thread_info结构空间,可以看到order为1,也就是两个页面*/ ti = alloc_thread_info(tsk); if (!ti) {/*如果thread info结构没申请到,释放tsk*/ free_task_struct(tsk); return NULL; } /*关于浮点结构的复制*/ err = arch_dup_task_struct(tsk, orig); if (err) goto out; tsk->stack = ti;/*task的对应栈*/ /*初始化prop_local_single结构*/ err = prop_local_init_single(&tsk->dirties); if (err) goto out; /*初始化thread info结构*/ setup_thread_stack(tsk, orig); stackend = end_of_stack(tsk);/*返回的是栈结束的地址*/ /*将这个地址设置数据为栈结束标示*/ *stackend = STACK_END_MAGIC; /* for overflow detection */ #ifdef CONFIG_CC_STACKPROTECTOR/*yes*/ tsk->stack_canary = get_random_int();/*初始化stack_canary变量*/ #endif /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0);/*占有的fs资源*/ #ifdef CONFIG_BLK_DEV_IO_TRACE/*yes*/ tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; account_kernel_stack(ti, 1); return tsk; out: free_thread_info(ti); free_task_struct(tsk); return NULL; }
/* * fork()/clone()-time setup: */ void sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu();/*返回cpu id*/ __sched_fork(p); /* * Revert to default priority/policy on fork if requested. */ if (unlikely(p->sched_reset_on_fork)) { if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { p->policy = SCHED_NORMAL; p->normal_prio = p->static_prio; } if (PRIO_TO_NICE(p->static_prio) < 0) { p->static_prio = NICE_TO_PRIO(0); p->normal_prio = p->static_prio; set_load_weight(p); } /* * We don't need the reset flag anymore after the fork. It has * fulfilled its duty: */ p->sched_reset_on_fork = 0; } /* * Make sure we do not leak PI boosting priority to the child. */ p->prio = current->normal_prio; if (!rt_prio(p->prio)) p->sched_class = &fair_sched_class; #ifdef CONFIG_SMP cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); #endif set_task_cpu(p, cpu);/*将进程指定到特定cpu上运行*/ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)/*yes*/ if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) p->oncpu = 0; #endif #ifdef CONFIG_PREEMPT/*not set*/ /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif plist_node_init(&p->pushable_tasks, MAX_PRIO); put_cpu(); }主要调用__sched_fork
/* * Perform scheduler related setup for a newly forked process p. * p is forked by current. * * __sched_fork() is basic setup used by init_idle() too: */ static void __sched_fork(struct task_struct *p) { /*初始化调度实体*/ p->se.exec_start = 0; p->se.sum_exec_runtime = 0; ... p->se.nr_wakeups_idle = 0; #endif INIT_LIST_HEAD(&p->rt.run_list);/*初始化实时调度运行队列*/ p->se.on_rq = 0; INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_PREEMPT_NOTIFIERS/*yes*/ INIT_HLIST_HEAD(&p->preempt_notifiers); #endif /* * We mark the process as running here, but have not actually * inserted it onto the runqueue yet. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. *//*英文注释写的很清楚了*/ p->state = TASK_RUNNING; }注释了大部分,有很多地方是很容易看懂的。另外对于进程创建,不用版本的内核差别很大,所以如果是看这块,最好先确定具体的版本,新版本中可能会加入很多新的东西。这样,需结合其他机制来理解。