copy_prodess

在 fork()函数中最重要的是 copy_peocess函数 实现进程的复制


asmlinkage int sys_fork(struct pt_regs regs)
{
return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
}

asmlinkage int sys_vfork(struct pt_regs regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
}

asmlinkage int sys_clone(struct pt_regs regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;


clone_flags = regs.ebx;
newsp = regs.ecx;
parent_tidptr = (int __user *)regs.edx;
child_tidptr = (int __user *)regs.edi;
if (!newsp)
newsp = regs.esp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

可知regs.ecx就是调用do_fork()参数child_stack,调用clone可以为子进程设置一个独立的用户空间堆栈如果chld_stack为0,表示使用父进程的用户空间堆栈;

在entry.s中有

EBX = 0x00
ECX = 0x04
EDX = 0x08
ESI = 0x0C
EDI = 0x10
EBP = 0x14
EAX = 0x18
DS = 0x1C
ES = 0x20
ORIG_EAX = 0x24
EIP = 0x28
CS = 0x2C
EFLAGS = 0x30
OLDESP = 0x34
OLDSS = 0x38

struct pt_regs {
long ebx;
long ecx;
long edx;
long esi;
long edi;
long ebp;
long eax;
int  xds;
int  xes;
long orig_eax;
long eip;
int  xcs;
long eflags;
long esp;
int  xss;
};

在内核中,很多函数的参数是pt_regs数据结构,定义在include/i386/ptrace.h中:

  把这个结构与内核栈的内容相比较,会发现堆栈的内容是这个数据结构的一个映象。


3.保存现场的宏SAVE_ALL

   在中断发生前夕,要把所有相关寄存器的内容都保存在堆栈中,这是通过SAVE_ALL宏完成的:

#define SAVE_ALL \

        cld; \

        pushl %es; \

        pushl %ds; \

        pushl %eax; \

        pushl %ebp; \

        pushl %edi; \

        pushl %esi; \

        pushl %edx; \

        pushl %ecx; \

        pushl %ebx; \

        movl $(__KERNEL_DS),%edx; \

        movl %edx,%ds; \

        movl %edx,%es;

   该宏执行以后,堆栈内容如图3.6所示。把这个宏与图3.5 结合起来就很容易理解图3.6,在此对该宏再给予解释:

·            CPU在进入中断处理程序时自动将用户栈指针(如果更换堆栈)、EFLAGS寄存器及返回地址一同压入堆栈。

·            段寄存器DS和ES原来的内容入栈,然后装入内核数据段描述符__KERNEL_DS(定义为0x18),内核段的DPL为0。

4.恢复现场的宏RESTORE_ALL

  当从中断返回时,恢复相关寄存器的内容,这是通过RESTORE_ALL宏完成的:

    #define RESTORE_ALL     \

        popl %ebx;      \

        popl %ecx;      \

         popl %edx;      \

         popl %esi;      \

         popl %edi;      \

         popl %ebp;      \

         popl %eax;      \

  1:      popl %ds;       \

  2:      popl %es;       \

          addl $4,%esp;   \

  3:      iret;          

    可以看出,RESTORE_ALL与SAVE_ALL遥相呼应。当执行到iret指令时,内核栈又恢复到刚进入中断门时的状态,并使CPU从中断返回。

5.将当前进程的task_struct 结构的地址放在寄存器中

 #define GET_CURRENT(reg) \

         movl $-8192, reg; \

         andl %esp, reg   





 long do_fork(unsigned long clone_flags,

                unsigned long stack_start,表示把用户态堆栈指针赋值给子进程的esp寄存器;

                                                         父进程应该总为子进程分配新 的堆栈

         struct pt_regs *regs,指向通用寄存器值得指针,通用寄存器的值在从用户态切换到

                                                内核态时会被保存到内核态  堆栈中

     unsigned long stack_size, 未使用 总被设置为0
     int __user *parent_tidptr, 表示父进程的用户态变量地址

     int __user *child_tidptr  表示新的进程的用户态变量地址

)


/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
static task_t *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
int pid)
{
int retval;sys_clone
struct task_struct *p = NULL;
    
  首先检查参数clone_flags所传递标志的一致性;在下列情况下

返回错误代号

1、clone_newns和clone_fs标志被设置


if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
*/

2、clone_thread标志被设置但是clone_sighand标志被清0

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
return ERR_PTR(-EINVAL);
/*
* Shared signal handlers imply shared VM. By way of the above,
* thread groups also imply shared VM. Blocking this case allows
* for various simplifications in other code.
*/

3、clone_sighand标志被设置,但clone_vm清0
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);

安全检查见可执行文件讲解
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;


retval = -ENOMEM;

为子进程描述符分配两个连续的物理页面,低端用作task_struct高端作为内核空间
p = dup_task_struct(current);

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
	struct task_struct *tsk;
	struct thread_info *ti;

	prepare_to_copy(orig);实质上调用__unlazy_fpu()把fpu、mmx、sse/sse2寄存器的内容保存到父进程的thread_info中
                                    在稍后的dup_task_struct()把这些值复制到子进程的thread_info中

	tsk = alloc_task_struct();//分配内存
	if (!tsk)
		return NULL;
             获取一块内存,用来存放新进程的thread_info结构和内核栈,
	ti = alloc_thread_info(tsk);
	if (!ti) {
		free_task_struct(tsk);
		return NULL;
	}
       将current进程的描述符copy到子进程的thread_info中
	*ti = *orig->thread_info;
	*tsk = *orig; //将current进程copy到子进程中
	tsk->thread_info = ti;将子进程的task_struct->thread_inf0指向ti(刚刚创建的thread_info)
	ti->task = tsk;//子进程的thread_info->task指向子进程的task

	/* One for us, one for whoever does the "release_task()" (usually parent) */
	atomic_set(&tsk->usage,2);//子进程描述符的使用计数器设置为2,
                                   /*用来表示进程的描述符正在被使用而其相应的进程处于活动状态*/
	return tsk;
}


if (!p)
goto fork_out;


retval = -EAGAIN;

//检查进程数是否小于或等于用户所拥有的进程数
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
p->user != &root_user)
goto bad_fork_free;
}


atomic_inc(&p->user->__count);
atomic_inc(&p->user->processes);
get_group_info(p->group_info);


/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
* to stop root fork bombs.对于不属于任何用户的内核线程,设置内核线程最大数
*/
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;


if (!try_module_get(p->thread_info->exec_domain->module))
goto bad_fork_cleanup_count;


if (p->binfmt && !try_module_get(p->binfmt->module))
goto bad_fork_cleanup_put_domain;
    /*

          task_struct结构中有个user指针用来指向一个user_struct结构;一个用户通常属于多个进程,所以有关用户的一些信息并不专属某一个进程;属于同一个用户的进程就可以通过指针user共享这些信息。每个用户有且只有一个user_struct结构,结构中有个计数器_count,对属于该用户

/*
 * Some day this will be a full-fledged user tracking system..
 */
struct user_struct {
	atomic_t __count;	/* reference count */
	atomic_t processes;	/* How many processes does this user have? */
	atomic_t files;		/* How many open files does this user have? */
	atomic_t sigpending;	/* How many pending signals does this user have? */
	/* protected by mq_lock	*/
	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
	unsigned long locked_shm; /* How many pages of mlocked shm ? */

#ifdef CONFIG_KEYS
	struct key *uid_keyring;	/* UID specific keyring */
	struct key *session_keyring;	/* UID's default session keyring */
#endif

	/* Hash table maintenance information */
	struct list_head uidhash_list;
	uid_t uid;
};

的进程数计数,可想而知。可想而知内核线程不属于某个用户;所以其task_struct的user指针为0;

怎样分析问题

   */

p->did_exec = 0;
copy_flags(clone_flags, p);
p->pid = pid;
retval = -EFAULT;
if (clone_flags & CLONE_PARENT_SETTID)
if (put_user(p->pid, parent_tidptr))
goto bad_fork_cleanup;


p->proc_dentry = NULL;


INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);

/*  对子进程待处理信号的队列以及有关结构的初始化        进程间通信会用到*/
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);

/*  各种计时变量的初始化,进程调度会用到*/
p->it_real_value = 0;
p->it_real_incr = 0;
p->it_virt_value = cputime_zero;
p->it_virt_incr = cputime_zero;
p->it_prof_value = cputime_zero;
p->it_prof_incr = cputime_zero;
init_timer(&p->real_timer);
p->real_timer.data = (unsigned long) p;


p->utime = cputime_zero;
p->stime = cputime_zero;
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
acct_clear_integrals(p);


p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
p->io_context = NULL;
p->io_wait = NULL;
p->audit_context = NULL;
#ifdef CONFIG_NUMA
  p->mempolicy = mpol_copy(p->mempolicy);
  if (IS_ERR(p->mempolicy)) {
  retval = PTR_ERR(p->mempolicy);
  p->mempolicy = NULL;
  goto bad_fork_cleanup;
  }
#endif


p->tgid = p->pid;
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;


if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_security;
/* copy all the process information */
if ((retval = copy_semundo(clone_flags, p)))
goto bad_fork_cleanup_audit;

/*  有选择copy父进程的已打开文件,



因为是当前进程在创建了进程,是从当前进程复制到了进程,所以把当前进程task struct结构中的files_ struct结构指针作为oldf。
  再看复制的条件。如果参数clone_ flags中的CLONE_ FILES标志位为1,就只是通过atomic_ inc()
递增当前进程的files_ struct结构中的共享计数,表示这个数据结构现在多了一个“用户”,就返回了。
由于在此之前已通过数据结构赋值将当前进程的整个task struct结构都复制给了了进程,结构中的指
针files白然也复制到了了进程的task struct结构中,使了进程通过这个指针共享当前进程的files_ struct
数据结构。否则,如果CLONE_ FILES标志位为0,那就要复制了。首先通过kmem_cache_ alloc()为了
进程分配一个files_ struct数据结构作为newf,然后从oldf把内容复制到newf。在files_ struct数据结构
中有三个主要的“部件’,。其一是个位图,名为close_ on_ exec_ init;其二也是位图,名为open fds一 init;
其三则是file结构数组fd_array[]。这三个部件都是固定大小的,如果打开的文件数量超过其容量,就

得通过expand_ fdset()和expand_ fd_array()在files_ struct数据结构以外另行分配空问作为替换。不管是
采用files_ struct数据结构内部的这三个部件或是采用外部的替换空问,指针close_ on_ exec, open fds
和fd总是分别指向这三组信息。所以,如何复制取决于已打开文件的数量。
    显而易见,共享比复制要简单得多。那么这二者在效果上到底有什么区别呢?如用共享就可以达
到目的,为什么还要不辞辛劳地复制呢?区别在于了进程(以及父进程本身)是否能“独立白主”。当
复制完成之初,了进程有了一份副本,它的内容与父进程的“正本”在内容上基本是相同的,在这一
点上似乎与共享没有什么区别。可是,随后区别就来了。在共享的情况下,两个进程是互相牵制的。
如果了进程对某个已打开文件调用了一次lseek(),则父进程对这个文件的读写位置也随着改变了,因
为两个进程共享着对文件的同一个读写上下文。而在复制的情况下就不一样了,由于了进程有白己的
副本,就有了对同一文件的另一个读写上下文,以后就可以各走各的路,互不干扰了。

*/

static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
	struct files_struct *oldf, *newf;
	struct file **old_fds, **new_fds;
	int open_files, size, i, error = 0, expand;

	/*
	 * A background process may not have any files ...
	 */
	oldf = current->files;
	if (!oldf)
		goto out;

	if (clone_flags & CLONE_FILES) {
		atomic_inc(&oldf->count);
		goto out;
	}

	/*
	 * Note: we may be using current for both targets (See exec.c)
	 * This works because we cache current->files (old) as oldf. Don't
	 * break this.
	 */
	tsk->files = NULL;
	error = -ENOMEM;
	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
	if (!newf) 
		goto out;

	atomic_set(&newf->count, 1);

	spin_lock_init(&newf->file_lock);
	newf->next_fd	    = 0;
	newf->max_fds	    = NR_OPEN_DEFAULT;
	newf->max_fdset	    = __FD_SETSIZE;
	newf->close_on_exec = &newf->close_on_exec_init;
	newf->open_fds	    = &newf->open_fds_init;
	newf->fd	    = &newf->fd_array[0];

	spin_lock(&oldf->file_lock);

	open_files = count_open_files(oldf, oldf->max_fdset);
	expand = 0;

	/*
	 * Check whether we need to allocate a larger fd array or fd set.
	 * Note: we're not a clone task, so the open count won't  change.
	 */
	if (open_files > newf->max_fdset) {
		newf->max_fdset = 0;
		expand = 1;
	}
	if (open_files > newf->max_fds) {
		newf->max_fds = 0;
		expand = 1;
	}

	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
	if (expand) {
		spin_unlock(&oldf->file_lock);
		spin_lock(&newf->file_lock);
		error = expand_files(newf, open_files-1);
		spin_unlock(&newf->file_lock);
		if (error < 0)
			goto out_release;
		spin_lock(&oldf->file_lock);
	}

	old_fds = oldf->fd;
	new_fds = newf->fd;

	memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
	memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);

	for (i = open_files; i != 0; i--) {
		struct file *f = *old_fds++;
		if (f) {
			get_file(f);
		} else {
			/*
			 * The fd may be claimed in the fd bitmap but not yet
			 * instantiated in the files array if a sibling thread
			 * is partway through open().  So make sure that this
			 * fd is available to the new process.
			 */
			FD_CLR(open_files - i, newf->open_fds);
		}
		*new_fds++ = f;
	}
	spin_unlock(&oldf->file_lock);

	/* compute the remainder to be cleared */
	size = (newf->max_fds - open_files) * sizeof(struct file *);

	/* This is long word aligned thus could use a optimized version */ 
	memset(new_fds, 0, size); 

	if (newf->max_fdset > open_files) {
		int left = (newf->max_fdset-open_files)/8;
		int start = open_files / (8 * sizeof(unsigned long));

		memset(&newf->open_fds->fds_bits[start], 0, left);
		memset(&newf->close_on_exec->fds_bits[start], 0, left);
	}

	tsk->files = newf;
	error = 0;
out:
	return error;

out_release:
	free_fdset (newf->close_on_exec, newf->max_fdset);
	free_fdset (newf->open_fds, newf->max_fdset);
	free_fd_array(newf->fd, newf->max_fds);
	kmem_cache_free(files_cachep, newf);
	goto out;
}


if ((retval = copy_files(clone_flags, p)))
goto bad_fork_cleanup_semundo;

/*

    除files_ struct数据结构外,还有个fs_ struct数据结构也是与文件系统有关的,也要通过共享或复
制遗传给了进程。类似地,copy_ fsQ也是只有在clone flags中CLONE_ FS标志位为。时才加以复制。
task struct结构中的指针指向一个fs_ struct数据结构,结构中记录的是进程的根目录root、当前工作目
录pwd、一个用于文件操作权限管理的umask,还有一个计数器,其定义在include/linux/fs_ struct.h中
(详查询“文件系统”)。函数copy_ fsQ连同儿个有关低层函数的代码也在kernel/fork.c中。

*/

static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
	if (clone_flags & CLONE_FS) {
		atomic_inc(¤t->fs->count);
		return 0;
	}
	tsk->fs = __copy_fs_struct(current->fs);
	if (!tsk->fs)
		return -ENOMEM;
	return 0;
}

tatic inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
{
	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
	/* We don't need to lock fs - think why ;-) */
	if (fs) {
		atomic_set(&fs->count, 1);
		rwlock_init(&fs->lock);
		fs->umask = old->umask;
		read_lock(&old->lock);
		fs->rootmnt = mntget(old->rootmnt);
		fs->root = dget(old->root);
		fs->pwdmnt = mntget(old->pwdmnt);
		fs->pwd = dget(old->pwd);
		if (old->altroot) {
			fs->altrootmnt = mntget(old->altrootmnt);
			fs->altroot = dget(old->altroot);
		} else {
			fs->altrootmnt = NULL;
			fs->altroot = NULL;
		}
		read_unlock(&old->lock);
	}
	return fs;
}
    代码中的mntget()和dgetQ都是用来递增相应数据结构中共享计数的,因为这些数据结构现在多了
一个用户。注意,在这里要复制的是fs_ struct数据结构,而并不复制更深层的数据结构。复制了fs_ struct
数据结构,就在这一层上有了白主性,至于对更深层的数据结构则还是共享,所以要递增它们的共享
计数。

*/
if ((retval = copy_fs(clone_flags, p)))
goto bad_fork_cleanup_files;


/*

 是否复制父进程对信号的处理是由标志位CLONE_ SIGHAND控制的。信号基本上是一种进程问通信手段,信号之于一个进程就好像中断之于一个处理器。进程可以为各种信号设置用于该信的处理程序,就好像系统可以为各个中断源设置相应的中断服务程序一样。如果一个进程设置了信号处理程序,其task struct结构中的指针sig就指向一个signal_ struct数据结构。这种结构是在include/linux/sched.h中定义的:

struct sighand_struct {
	atomic_t		count;
	struct k_sigaction	action[_NSIG];
	spinlock_t		siglock;
};
    其中的数组action[]确定了一个进程对各种信号(以信号的数值为下标)的反应和处理,了进程可
以通过复制或共享把它从父进程继承下来。函数copy_ sighand()的代码如下(kernel/fork.c
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
	struct sighand_struct *sig;

	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
		atomic_inc(¤t->sighand->count);
		return 0;
	}
	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
	tsk->sighand = sig;
	if (!sig)
		return -ENOMEM;
	spin_lock_init(&sig->siglock);
	atomic_set(&sig->count, 1);
	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
	return 0;
}

<span style="font-size:18px;">static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
	struct signal_struct *sig;

	if (clone_flags & CLONE_THREAD) {
		atomic_inc(¤t->signal->count);
		atomic_inc(¤t->signal->live);
		return 0;
	}
	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
	tsk->signal = sig;
	if (!sig)
		return -ENOMEM;
	atomic_set(&sig->count, 1);
	atomic_set(&sig->live, 1);
	</span><span style="font-size:24px;">init_waitqueue_head(&sig->wait_chldexit);</span>
<span style="font-size:24px;">wait4()和w ait3 U,一个进程可以停下来等待其了进程完成使命。为此,task_struct中设置了一个队列头部wait_chldexit,</span>
	sig->flags = 0;
	sig->group_exit_code = 0;
	sig->group_exit_task = NULL;
	sig->group_stop_count = 0;
	sig->curr_target = NULL;
	init_sigpending(&sig->shared_pending);
	INIT_LIST_HEAD(&sig->posix_timers);

	sig->tty = current->signal->tty;
	sig->pgrp = process_group(current);
	sig->session = current->signal->session;
	sig->leader = 0;	/* session leadership doesn't inherit */
	sig->tty_old_pgrp = 0;

	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;

	task_lock(current->group_leader);
	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
	task_unlock(current->group_leader);

	return 0;
}


*/
if ((retval = copy_sighand(clone_flags, p)))
goto bad_fork_cleanup_fs;
if ((retval = copy_signal(clone_flags, p)))
goto bad_fork_cleanup_sighand;

/*

进程的task struct结构中有个指针mm,它指向一个代表着进程的用户空问的mm_struct数据结构。由于内核线程并不拥有用户空问,所以在内核线程的task struct结构中该指针为0。有关mm- struct及其下属的vm_area struct等数据结构详见内存管理。函数copy_mm()的代码还是在kernel/fork.c中:

*/
if ((retval = copy_mm(clone_flags, p)))
goto bad_fork_cleanup_signal;
if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
if ((retval = copy_namespace(clone_flags, p)))
goto bad_fork_cleanup_keys;

/*

 前面已通过alloc_ task struct()分配了两个连续的页面,其低端用作task struct结构,已经基本上复制好了;而用作系统空问堆栈的高端,却还没有复制。现在就由copy_ thread()来做这件事了。这个函数的代码在arch/i 3 86/kernel/process. c中:

    名为copy_ thread(),实际上却只是复制父进程的系统空问堆栈。堆栈中的内容说明了父进程从通过系统调用进入系统空问开始到进入copy_ thread()的来历,了进程将要循相同的路线返回,所以要把它复制给了进程。但是,如果了进程的系统空问堆栈与父进程的完全相同,那返回以后就无从区分谁是了进程了,所以复制以后还要略作调整。当一个进程因系统调用或中断而进入内核时,其系统空问堆栈的顶部保存着CPU进入内核前夕各个寄存器的内容,并形成一个pt regs数据结构。的p为了进程的task struct指针,指向两个连续物理页面的起始地址;而THREAD_ SIZE+(unsigned long)p则指向这两个页面的顶端。将其变换成struct pt regs*,再从中减1,就指向了了进程系统空问堆栈中的pt regs结构,如下图


  得到了指向了进程系统空问堆栈中pt regs结构的指针childregs以后,就先将当前进程系统空问堆栈中的pt regs结构复制过去,再来作少量的调整。什么样的调整呢?首先,将该结构中的eax置成0。当了进程受调度而“恢复”运行,从系统调用“返回”时,这就是返回值。如前所述,了进程的返回值为0。其次,还要将结构中的esp置成这里的参esp,它决定了进程在用户空问的堆栈位置。在_clone()调用中,这个参数是由调用者给定的。而在fork()和vfork()中,则来白调用do_ fork()前夕的regs.esp所以实际上并没有改变,还是指向父进程原来在用户空问的堆栈。
    在进程的task struct结构中有个重要的成分thread,它本身是一个数据结构thread_ struct,里面记录着进程在切换时的(系统空问)堆栈指针,取指令地址(也就是“返回地址”)等关键性的信息。在复制task struct数据结构的时候,这些信息也原封不动地复制了过来。可是,了进程有白己的系统空问堆栈,所以也要相应加以调整。具体地说,
将p->thread.esp设置成了进程系统空问堆栈中pt regs结构的起始地址,就好像这个了进程以前曾经运行过,而在进入内核以后正要返回用户空问时被切换了一样。而p->thread.esp0则应该指向了进程的系统空问堆栈的顶端。当一个进程被调度运行时,内核会将这个变量的值写入TSS的esp0字段,表示这个进程进入。级运行时其堆栈的位置。此外,

p->thread.eip的值表示当进程下一次被切换进入运行时的切入点,类似于函数调用或中断的返回地址。将此地址设置成ret_from_fork()此汇编会调用schedule_tail()函数,它一次调用finish_task_switch来完成切换;,使创建的了进程在首次被调度运行时就从那儿开始,这一点以后在阅读有关进程切换的代码时还要讲到。的savesegment是个宏操作,

*
 * Save a segment register away
 */
#define savesegment(seg, value) \
asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))

即:

asm volatile ("movl }}fs,}0“:“=m"(*(int*)&p->thread.fs))

也就是把当前的段寄存器fs的值保存在p->thread.fs中。

int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
	unsigned long unused,
	struct task_struct * p, struct pt_regs * regs)
{
	struct pt_regs * childregs;
	struct task_struct *tsk;
	int err;

	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
	*childregs = *regs;
	childregs->eax = 0;
	childregs->esp = esp;

	p->thread.esp = (unsigned long) childregs;
	p->thread.esp0 = (unsigned long) (childregs+1);

	p->thread.eip = (unsigned long) ret_from_fork;

	savesegment(fs,p->thread.fs);
	savesegment(gs,p->thread.gs);

	tsk = current;
	if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
		memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
			IO_BITMAP_BYTES);
	}

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
		struct desc_struct *desc;
		struct user_desc info;
		int idx;

		err = -EFAULT;
		if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
			goto out;
		err = -EINVAL;
		if (LDT_empty(&info))
			goto out;

		idx = info.entry_number;
		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
			goto out;

		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
		desc->a = LDT_entry_a(&info);
		desc->b = LDT_entry_b(&info);
	}

	err = 0;
 out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}


*/
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
goto bad_fork_cleanup_namespace;


p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;


/*
* Syscall tracing should be turned off in the child regardless
* of CLONE_PTRACE.
*/

/*

清除tif_syscall_trace标志,使ret_from_fork函数不会把系统调用结束的消息通知给调试进程;


*/
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);


/* Our parent execution domain becomes current domain
  These must match for thread signalling to apply */
  
p->parent_exec_id = p->self_exec_id;


/* ok, now we should be set up.. */
p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
p->pdeath_signal = 0;
p->exit_state = 0;


/* Perform scheduler related setup完成对新进程调度的数据结构初始化,新进程设为task_running 

将preempt_count设置为1,禁止内核抢占 */
sched_fork(p);


/*
* Ok, make it visible to the rest of the system.
* We dont wake it up yet.
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);


/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);


/*
* The task hasn't been attached yet, so cpus_allowed mask cannot
* have changed. The cpus_allowed mask of the parent may have
* changed after it was copied first time, and it may then move to
* another CPU - so we re-copy it here and set the child's CPU to
* the parent's CPU. This avoids alot of nasty races.
*/
p->cpus_allowed = current->cpus_allowed;
set_task_cpu(p, smp_processor_id());设置当前进程的CPU


/*
* Check for pending SIGKILL! The new thread should not be allowed
* to slip out of an OOM kill. (or normal SIGKILL.)
*/
if (sigismember(&current->pending.signal, SIGKILL)) {
write_unlock_irq(&tasklist_lock);
retval = -EINTR;
goto bad_fork_cleanup_namespace;
}


/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
p->real_parent = current->real_parent;
else
p->real_parent = current;
p->parent = p->real_parent;


if (clone_flags & CLONE_THREAD) {
spin_lock(&current->sighand->siglock);
/*
* Important: if an exit-all has been started then
* do not create this new thread - the whole thread
* group is supposed to exit anyway.
*/
if (current->signal->flags & SIGNAL_GROUP_EXIT) {
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
p->group_leader = current->group_leader;


if (current->signal->group_stop_count > 0) {
/*
* There is an all-stop in progress for the group.
* We ourselves will stop as soon as we check signals.
* Make the new thread part of that group stop too.
*/
current->signal->group_stop_count++;
set_tsk_thread_flag(p, TIF_SIGPENDING);
}


spin_unlock(&current->sighand->siglock);
}


SET_LINKS(p);把新进程描述符插入进程链表

#define SET_LINKS(p) do {					\
	if (thread_group_leader(p))				\
		list_add_tail(&(p)->tasks,&init_task.tasks);	\
	add_parent(p, (p)->parent);				\
	} while (0)


if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);


attach_pid(p, PIDTYPE_PID, p->pid);
attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
attach_pid(p, PIDTYPE_PGID, process_group(p));
attach_pid(p, PIDTYPE_SID, p->signal->session);
if (p->pid)
__get_cpu_var(process_counts)++;
}


nr_threads++;
total_forks++;
write_unlock_irq(&tasklist_lock);
retval = 0;


fork_out:
if (retval)
return ERR_PTR(retval);
return p;


bad_fork_cleanup_namespace:
exit_namespace(p);
bad_fork_cleanup_keys:
exit_keys(p);
bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
exit_signal(p);
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
exit_sem(p);
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_security:
security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
mpol_free(p->mempolicy);
#endif
bad_fork_cleanup:
if (p->binfmt)
module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
module_put(p->thread_info->exec_domain->module);
bad_fork_cleanup_count:
put_group_info(p->group_info);
atomic_dec(&p->user->processes);
free_uid(p->user);
bad_fork_free:
free_task(p);
goto fork_out;

}

你可能感兴趣的:(copy_prodess)