u012681083

copy_prodess

在 fork（）函数中最重要的是 copy_peocess函数实现进程的复制

asmlinkage int sys_fork(struct pt_regs regs)
{
return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
}

asmlinkage int sys_vfork(struct pt_regs regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
}

asmlinkage int sys_clone(struct pt_regs regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;

clone_flags = regs.ebx;
newsp = regs.ecx;
parent_tidptr = (int __user *)regs.edx;
child_tidptr = (int __user *)regs.edi;
if (!newsp)
newsp = regs.esp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

可知regs.ecx就是调用do_fork（）参数child_stack，调用clone可以为子进程设置一个独立的用户空间堆栈如果chld_stack为0，表示使用父进程的用户空间堆栈；

在entry.s中有

EBX = 0x00
ECX = 0x04
EDX = 0x08
ESI = 0x0C
EDI = 0x10
EBP = 0x14
EAX = 0x18
DS = 0x1C
ES = 0x20
ORIG_EAX = 0x24
EIP = 0x28
CS = 0x2C
EFLAGS = 0x30
OLDESP = 0x34
OLDSS = 0x38

struct pt_regs {
long ebx;
long ecx;
long edx;
long esi;
long edi;
long ebp;
long eax;
int xds;
int xes;
long orig_eax;
long eip;
int xcs;
long eflags;
long esp;
int xss;
};

在内核中，很多函数的参数是pt_regs数据结构，定义在include/i386/ptrace.h中：

把这个结构与内核栈的内容相比较，会发现堆栈的内容是这个数据结构的一个映象。

3．保存现场的宏SAVE_ALL

在中断发生前夕，要把所有相关寄存器的内容都保存在堆栈中,这是通过SAVE_ALL宏完成的：

#define SAVE_ALL \

cld; \

pushl %es; \

pushl %ds; \

pushl %eax; \

pushl %ebp; \

pushl %edi; \

pushl %esi; \

pushl %edx; \

pushl %ecx; \

pushl %ebx; \

movl $(__KERNEL_DS),%edx; \

movl %edx,%ds; \

movl %edx,%es;

该宏执行以后，堆栈内容如图3.6所示。把这个宏与图3.5 结合起来就很容易理解图3.6，在此对该宏再给予解释：

· CPU在进入中断处理程序时自动将用户栈指针（如果更换堆栈）、EFLAGS寄存器及返回地址一同压入堆栈。

· 段寄存器DS和ES原来的内容入栈，然后装入内核数据段描述符__KERNEL_DS（定义为0x18）,内核段的DPL为0。

4．恢复现场的宏RESTORE_ALL

当从中断返回时，恢复相关寄存器的内容，这是通过RESTORE_ALL宏完成的：

#define RESTORE_ALL \

popl %ebx; \

popl %ecx; \

popl %edx; \

popl %esi; \

popl %edi; \

popl %ebp; \

popl %eax; \

1: popl %ds; \

2: popl %es; \

addl $4,%esp; \

3: iret;

可以看出，RESTORE_ALL与SAVE_ALL遥相呼应。当执行到iret指令时，内核栈又恢复到刚进入中断门时的状态，并使CPU从中断返回。

5．将当前进程的task_struct 结构的地址放在寄存器中

#define GET_CURRENT(reg) \

movl $-8192, reg; \

andl %esp, reg

long do_fork(unsigned long clone_flags,

unsigned long stack_start,表示把用户态堆栈指针赋值给子进程的esp寄存器；

父进程应该总为子进程分配新的堆栈

struct pt_regs *regs,指向通用寄存器值得指针，通用寄存器的值在从用户态切换到

内核态时会被保存到内核态堆栈中

unsigned long stack_size, 未使用总被设置为0
int __user *parent_tidptr, 表示父进程的用户态变量地址

int __user *child_tidptr 表示新的进程的用户态变量地址

)

/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static task_t *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
int pid)
{
int retval;sys_clone
struct task_struct *p = NULL;

首先检查参数clone_flags所传递标志的一致性；在下列情况下

返回错误代号

1、clone_newns和clone_fs标志被设置

if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
*/

2、clone_thread标志被设置但是clone_sighand标志被清0

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
return ERR_PTR(-EINVAL);
/*
* Shared signal handlers imply shared VM. By way of the above,
* thread groups also imply shared VM. Blocking this case allows
* for various simplifications in other code.
*/

3、clone_sighand标志被设置，但clone_vm清0
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);

安全检查见可执行文件讲解
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;

retval = -ENOMEM;

为子进程描述符分配两个连续的物理页面，低端用作task_struct高端作为内核空间
p = dup_task_struct(current);

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
	struct task_struct *tsk;
	struct thread_info *ti;

	prepare_to_copy(orig);实质上调用__unlazy_fpu()把fpu、mmx、sse/sse2寄存器的内容保存到父进程的thread_info中

                                    在稍后的dup_task_struct（）把这些值复制到子进程的thread_info中

	tsk = alloc_task_struct();//分配内存
	if (!tsk)
		return NULL;
             获取一块内存，用来存放新进程的thread_info结构和内核栈，
	ti = alloc_thread_info(tsk);
	if (!ti) {
		free_task_struct(tsk);
		return NULL;
	}
       将current进程的描述符copy到子进程的thread_info中
	*ti = *orig->thread_info;
	*tsk = *orig; //将current进程copy到子进程中
	tsk->thread_info = ti;将子进程的task_struct->thread_inf0指向ti（刚刚创建的thread_info）
	ti->task = tsk;//子进程的thread_info->task指向子进程的task

	/* One for us, one for whoever does the "release_task()" (usually parent) */
	atomic_set(&tsk->usage,2);//子进程描述符的使用计数器设置为2，

                                   /*用来表示进程的描述符正在被使用而其相应的进程处于活动状态*/
	return tsk;
}

if (!p)
goto fork_out;

retval = -EAGAIN;

//检查进程数是否小于或等于用户所拥有的进程数
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
p->user != &root_user)
goto bad_fork_free;
}

atomic_inc(&p->user->__count);
atomic_inc(&p->user->processes);
get_group_info(p->group_info);

/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
* to stop root fork bombs.对于不属于任何用户的内核线程，设置内核线程最大数
*/
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;

if (!try_module_get(p->thread_info->exec_domain->module))
goto bad_fork_cleanup_count;

if (p->binfmt && !try_module_get(p->binfmt->module))
goto bad_fork_cleanup_put_domain;
/*

task_struct结构中有个user指针用来指向一个user_struct结构；一个用户通常属于多个进程，所以有关用户的一些信息并不专属某一个进程；属于同一个用户的进程就可以通过指针user共享这些信息。每个用户有且只有一个user_struct结构，结构中有个计数器_count,对属于该用户

/*
 * Some day this will be a full-fledged user tracking system..
 */
struct user_struct {
	atomic_t __count;	/* reference count */
	atomic_t processes;	/* How many processes does this user have? */
	atomic_t files;		/* How many open files does this user have? */
	atomic_t sigpending;	/* How many pending signals does this user have? */
	/* protected by mq_lock	*/
	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
	unsigned long locked_shm; /* How many pages of mlocked shm ? */

#ifdef CONFIG_KEYS
	struct key *uid_keyring;	/* UID specific keyring */
	struct key *session_keyring;	/* UID's default session keyring */
#endif

	/* Hash table maintenance information */
	struct list_head uidhash_list;
	uid_t uid;
};

的进程数计数，可想而知。可想而知内核线程不属于某个用户；所以其task_struct的user指针为0；

怎样分析问题

*/

p->did_exec = 0;
copy_flags(clone_flags, p);
p->pid = pid;
retval = -EFAULT;
if (clone_flags & CLONE_PARENT_SETTID)
if (put_user(p->pid, parent_tidptr))
goto bad_fork_cleanup;

p->proc_dentry = NULL;

INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);

/* 对子进程待处理信号的队列以及有关结构的初始化 进程间通信会用到*/
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);

/* 各种计时变量的初始化，进程调度会用到*/
p->it_real_value = 0;
p->it_real_incr = 0;
p->it_virt_value = cputime_zero;
p->it_virt_incr = cputime_zero;
p->it_prof_value = cputime_zero;
p->it_prof_incr = cputime_zero;
init_timer(&p->real_timer);
p->real_timer.data = (unsigned long) p;

p->utime = cputime_zero;
p->stime = cputime_zero;
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
acct_clear_integrals(p);

p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
p->io_context = NULL;
p->io_wait = NULL;
p->audit_context = NULL;
#ifdef CONFIG_NUMA
p->mempolicy = mpol_copy(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
goto bad_fork_cleanup;
}
#endif

p->tgid = p->pid;
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;

if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_security;
/* copy all the process information */
if ((retval = copy_semundo(clone_flags, p)))
goto bad_fork_cleanup_audit;

/* 有选择copy父进程的已打开文件，

因为是当前进程在创建了进程，是从当前进程复制到了进程，所以把当前进程task struct结构中的files_ struct结构指针作为oldf。
再看复制的条件。如果参数clone_ flags中的CLONE_ FILES标志位为1，就只是通过atomic_ inc()
递增当前进程的files_ struct结构中的共享计数，表示这个数据结构现在多了一个“用户”，就返回了。
由于在此之前已通过数据结构赋值将当前进程的整个task struct结构都复制给了了进程，结构中的指
针files白然也复制到了了进程的task struct结构中，使了进程通过这个指针共享当前进程的files_ struct
数据结构。否则，如果CLONE_ FILES标志位为0，那就要复制了。首先通过kmem_cache_ alloc()为了
进程分配一个files_ struct数据结构作为newf，然后从oldf把内容复制到newf。在files_ struct数据结构
中有三个主要的“部件’，。其一是个位图，名为close_ on_ exec_ init;其二也是位图，名为open fds一 init;
其三则是file结构数组fd_array[]。这三个部件都是固定大小的，如果打开的文件数量超过其容量，就

得通过expand_ fdset()和expand_ fd_array()在files_ struct数据结构以外另行分配空问作为替换。不管是
采用files_ struct数据结构内部的这三个部件或是采用外部的替换空问，指针close_ on_ exec, open fds
和fd总是分别指向这三组信息。所以，如何复制取决于已打开文件的数量。
显而易见，共享比复制要简单得多。那么这二者在效果上到底有什么区别呢?如用共享就可以达
到目的，为什么还要不辞辛劳地复制呢?区别在于了进程(以及父进程本身)是否能“独立白主”。当
复制完成之初，了进程有了一份副本，它的内容与父进程的“正本”在内容上基本是相同的，在这一
点上似乎与共享没有什么区别。可是，随后区别就来了。在共享的情况下，两个进程是互相牵制的。
如果了进程对某个已打开文件调用了一次lseek()，则父进程对这个文件的读写位置也随着改变了，因
为两个进程共享着对文件的同一个读写上下文。而在复制的情况下就不一样了，由于了进程有白己的
副本，就有了对同一文件的另一个读写上下文，以后就可以各走各的路，互不干扰了。

static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
	struct files_struct *oldf, *newf;
	struct file **old_fds, **new_fds;
	int open_files, size, i, error = 0, expand;

	/*
	 * A background process may not have any files ...
	 */
	oldf = current->files;
	if (!oldf)
		goto out;

	if (clone_flags & CLONE_FILES) {
		atomic_inc(&oldf->count);
		goto out;
	}

	/*
	 * Note: we may be using current for both targets (See exec.c)
	 * This works because we cache current->files (old) as oldf. Don't
	 * break this.
	 */
	tsk->files = NULL;
	error = -ENOMEM;
	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
	if (!newf) 
		goto out;

	atomic_set(&newf->count, 1);

	spin_lock_init(&newf->file_lock);
	newf->next_fd	    = 0;
	newf->max_fds	    = NR_OPEN_DEFAULT;
	newf->max_fdset	    = __FD_SETSIZE;
	newf->close_on_exec = &newf->close_on_exec_init;
	newf->open_fds	    = &newf->open_fds_init;
	newf->fd	    = &newf->fd_array[0];

	spin_lock(&oldf->file_lock);

	open_files = count_open_files(oldf, oldf->max_fdset);
	expand = 0;

	/*
	 * Check whether we need to allocate a larger fd array or fd set.
	 * Note: we're not a clone task, so the open count won't  change.
	 */
	if (open_files > newf->max_fdset) {
		newf->max_fdset = 0;
		expand = 1;
	}
	if (open_files > newf->max_fds) {
		newf->max_fds = 0;
		expand = 1;
	}

	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
	if (expand) {
		spin_unlock(&oldf->file_lock);
		spin_lock(&newf->file_lock);
		error = expand_files(newf, open_files-1);
		spin_unlock(&newf->file_lock);
		if (error < 0)
			goto out_release;
		spin_lock(&oldf->file_lock);
	}

	old_fds = oldf->fd;
	new_fds = newf->fd;

	memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
	memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);

	for (i = open_files; i != 0; i--) {
		struct file *f = *old_fds++;
		if (f) {
			get_file(f);
		} else {
			/*
			 * The fd may be claimed in the fd bitmap but not yet
			 * instantiated in the files array if a sibling thread
			 * is partway through open().  So make sure that this
			 * fd is available to the new process.
			 */
			FD_CLR(open_files - i, newf->open_fds);
		}
		*new_fds++ = f;
	}
	spin_unlock(&oldf->file_lock);

	/* compute the remainder to be cleared */
	size = (newf->max_fds - open_files) * sizeof(struct file *);

	/* This is long word aligned thus could use a optimized version */ 
	memset(new_fds, 0, size); 

	if (newf->max_fdset > open_files) {
		int left = (newf->max_fdset-open_files)/8;
		int start = open_files / (8 * sizeof(unsigned long));

		memset(&newf->open_fds->fds_bits[start], 0, left);
		memset(&newf->close_on_exec->fds_bits[start], 0, left);
	}

	tsk->files = newf;
	error = 0;
out:
	return error;

out_release:
	free_fdset (newf->close_on_exec, newf->max_fdset);
	free_fdset (newf->open_fds, newf->max_fdset);
	free_fd_array(newf->fd, newf->max_fds);
	kmem_cache_free(files_cachep, newf);
	goto out;
}

if ((retval = copy_files(clone_flags, p)))
goto bad_fork_cleanup_semundo;

除files_ struct数据结构外，还有个fs_ struct数据结构也是与文件系统有关的，也要通过共享或复
制遗传给了进程。类似地，copy_ fsQ也是只有在clone flags中CLONE_ FS标志位为。时才加以复制。
task struct结构中的指针指向一个fs_ struct数据结构，结构中记录的是进程的根目录root、当前工作目
录pwd、一个用于文件操作权限管理的umask，还有一个计数器，其定义在include/linux/fs_ struct.h中
(详查询“文件系统”)。函数copy_ fsQ连同儿个有关低层函数的代码也在kernel/fork.c中。

static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
	if (clone_flags & CLONE_FS) {
		atomic_inc(¤t->fs->count);
		return 0;
	}
	tsk->fs = __copy_fs_struct(current->fs);
	if (!tsk->fs)
		return -ENOMEM;
	return 0;
}

tatic inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
{
	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
	/* We don't need to lock fs - think why ;-) */
	if (fs) {
		atomic_set(&fs->count, 1);
		rwlock_init(&fs->lock);
		fs->umask = old->umask;
		read_lock(&old->lock);
		fs->rootmnt = mntget(old->rootmnt);
		fs->root = dget(old->root);
		fs->pwdmnt = mntget(old->pwdmnt);
		fs->pwd = dget(old->pwd);
		if (old->altroot) {
			fs->altrootmnt = mntget(old->altrootmnt);
			fs->altroot = dget(old->altroot);
		} else {
			fs->altrootmnt = NULL;
			fs->altroot = NULL;
		}
		read_unlock(&old->lock);
	}
	return fs;
}

代码中的mntget()和dgetQ都是用来递增相应数据结构中共享计数的，因为这些数据结构现在多了
一个用户。注意，在这里要复制的是fs_ struct数据结构，而并不复制更深层的数据结构。复制了fs_ struct
数据结构，就在这一层上有了白主性，至于对更深层的数据结构则还是共享，所以要递增它们的共享
计数。

*/
if ((retval = copy_fs(clone_flags, p)))
goto bad_fork_cleanup_files;

是否复制父进程对信号的处理是由标志位CLONE_ SIGHAND控制的。信号基本上是一种进程问通信手段，信号之于一个进程就好像中断之于一个处理器。进程可以为各种信号设置用于该信的处理程序，就好像系统可以为各个中断源设置相应的中断服务程序一样。如果一个进程设置了信号处理程序，其task struct结构中的指针sig就指向一个signal_ struct数据结构。这种结构是在include/linux/sched.h中定义的:

struct sighand_struct {
	atomic_t		count;
	struct k_sigaction	action[_NSIG];
	spinlock_t		siglock;
};

其中的数组action[]确定了一个进程对各种信号(以信号的数值为下标)的反应和处理，了进程可
以通过复制或共享把它从父进程继承下来。函数copy_ sighand()的代码如下(kernel/fork.c

static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
	struct sighand_struct *sig;

	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
		atomic_inc(¤t->sighand->count);
		return 0;
	}
	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
	tsk->sighand = sig;
	if (!sig)
		return -ENOMEM;
	spin_lock_init(&sig->siglock);
	atomic_set(&sig->count, 1);
	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
	return 0;
}

<span style="font-size:18px;">static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
	struct signal_struct *sig;

	if (clone_flags & CLONE_THREAD) {
		atomic_inc(¤t->signal->count);
		atomic_inc(¤t->signal->live);
		return 0;
	}
	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
	tsk->signal = sig;
	if (!sig)
		return -ENOMEM;
	atomic_set(&sig->count, 1);
	atomic_set(&sig->live, 1);
	</span><span style="font-size:24px;">init_waitqueue_head(&sig->wait_chldexit);</span>

<span style="font-size:24px;">wait4()和w ait3 U，一个进程可以停下来等待其了进程完成使命。为此，task_struct中设置了一个队列头部wait_chldexit，</span>
	sig->flags = 0;
	sig->group_exit_code = 0;
	sig->group_exit_task = NULL;
	sig->group_stop_count = 0;
	sig->curr_target = NULL;
	init_sigpending(&sig->shared_pending);
	INIT_LIST_HEAD(&sig->posix_timers);

	sig->tty = current->signal->tty;
	sig->pgrp = process_group(current);
	sig->session = current->signal->session;
	sig->leader = 0;	/* session leadership doesn't inherit */
	sig->tty_old_pgrp = 0;

	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;

	task_lock(current->group_leader);
	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
	task_unlock(current->group_leader);

	return 0;
}

*/
if ((retval = copy_sighand(clone_flags, p)))
goto bad_fork_cleanup_fs;
if ((retval = copy_signal(clone_flags, p)))
goto bad_fork_cleanup_sighand;

进程的task struct结构中有个指针mm，它指向一个代表着进程的用户空问的mm_struct数据结构。由于内核线程并不拥有用户空问，所以在内核线程的task struct结构中该指针为0。有关mm- struct及其下属的vm_area struct等数据结构详见内存管理。函数copy_mm()的代码还是在kernel/fork.c中:

*/
if ((retval = copy_mm(clone_flags, p)))
goto bad_fork_cleanup_signal;
if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
if ((retval = copy_namespace(clone_flags, p)))
goto bad_fork_cleanup_keys;

前面已通过alloc_ task struct()分配了两个连续的页面，其低端用作task struct结构，已经基本上复制好了;而用作系统空问堆栈的高端，却还没有复制。现在就由copy_ thread()来做这件事了。这个函数的代码在arch/i 3 86/kernel/process. c中:

名为copy_ thread()，实际上却只是复制父进程的系统空问堆栈。堆栈中的内容说明了父进程从通过系统调用进入系统空问开始到进入copy_ thread()的来历，了进程将要循相同的路线返回，所以要把它复制给了进程。但是，如果了进程的系统空问堆栈与父进程的完全相同，那返回以后就无从区分谁是了进程了，所以复制以后还要略作调整。当一个进程因系统调用或中断而进入内核时，其系统空问堆栈的顶部保存着CPU进入内核前夕各个寄存器的内容，并形成一个pt regs数据结构。的p为了进程的task struct指针，指向两个连续物理页面的起始地址;而THREAD_ SIZE+(unsigned long)p则指向这两个页面的顶端。将其变换成struct pt regs*，再从中减1，就指向了了进程系统空问堆栈中的pt regs结构，如下图

得到了指向了进程系统空问堆栈中pt regs结构的指针childregs以后，就先将当前进程系统空问堆栈中的pt regs结构复制过去，再来作少量的调整。什么样的调整呢?首先，将该结构中的eax置成0。当了进程受调度而“恢复”运行，从系统调用“返回”时，这就是返回值。如前所述，了进程的返回值为0。其次，还要将结构中的esp置成这里的参esp，它决定了进程在用户空问的堆栈位置。在_clone()调用中，这个参数是由调用者给定的。而在fork()和vfork()中，则来白调用do_ fork()前夕的regs.esp所以实际上并没有改变，还是指向父进程原来在用户空问的堆栈。
在进程的task struct结构中有个重要的成分thread，它本身是一个数据结构thread_ struct，里面记录着进程在切换时的(系统空问)堆栈指针，取指令地址(也就是“返回地址”)等关键性的信息。在复制task struct数据结构的时候，这些信息也原封不动地复制了过来。可是，了进程有白己的系统空问堆栈，所以也要相应加以调整。具体地说，将p->thread.esp设置成了进程系统空问堆栈中pt regs结构的起始地址，就好像这个了进程以前曾经运行过，而在进入内核以后正要返回用户空问时被切换了一样。而p->thread.esp0则应该指向了进程的系统空问堆栈的顶端。当一个进程被调度运行时，内核会将这个变量的值写入TSS的esp0字段，表示这个进程进入。级运行时其堆栈的位置。此外，

p->thread.eip的值表示当进程下一次被切换进入运行时的切入点，类似于函数调用或中断的返回地址。将此地址设置成ret_from_fork()此汇编会调用schedule_tail（）函数，它一次调用finish_task_switch来完成切换；，使创建的了进程在首次被调度运行时就从那儿开始，这一点以后在阅读有关进程切换的代码时还要讲到。的savesegment是个宏操作，

*
* Save a segment register away
*/
#define savesegment(seg, value) \
asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))

即：

asm volatile ("movl }}fs，}0“:“=m"(*(int*)&p->thread.fs))

也就是把当前的段寄存器fs的值保存在p->thread.fs中。

int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
	unsigned long unused,
	struct task_struct * p, struct pt_regs * regs)
{
	struct pt_regs * childregs;
	struct task_struct *tsk;
	int err;

	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
	*childregs = *regs;
	childregs->eax = 0;
	childregs->esp = esp;

	p->thread.esp = (unsigned long) childregs;
	p->thread.esp0 = (unsigned long) (childregs+1);

	p->thread.eip = (unsigned long) ret_from_fork;

	savesegment(fs,p->thread.fs);
	savesegment(gs,p->thread.gs);

	tsk = current;
	if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
		memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
			IO_BITMAP_BYTES);
	}

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
		struct desc_struct *desc;
		struct user_desc info;
		int idx;

		err = -EFAULT;
		if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
			goto out;
		err = -EINVAL;
		if (LDT_empty(&info))
			goto out;

		idx = info.entry_number;
		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
			goto out;

		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
		desc->a = LDT_entry_a(&info);
		desc->b = LDT_entry_b(&info);
	}

	err = 0;
 out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

*/
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
goto bad_fork_cleanup_namespace;

p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

/*
* Syscall tracing should be turned off in the child regardless
* of CLONE_PTRACE.
*/

清除tif_syscall_trace标志，使ret_from_fork函数不会把系统调用结束的消息通知给调试进程；

*/
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */

p->parent_exec_id = p->self_exec_id;

/* ok, now we should be set up.. */
p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
p->pdeath_signal = 0;
p->exit_state = 0;

/* Perform scheduler related setup完成对新进程调度的数据结构初始化，新进程设为task_running

将preempt_count设置为1，禁止内核抢占 */
sched_fork(p);

/*
* Ok, make it visible to the rest of the system.
* We dont wake it up yet.
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);

/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);

/*
* The task hasn't been attached yet, so cpus_allowed mask cannot
* have changed. The cpus_allowed mask of the parent may have
* changed after it was copied first time, and it may then move to
* another CPU - so we re-copy it here and set the child's CPU to
* the parent's CPU. This avoids alot of nasty races.
*/
p->cpus_allowed = current->cpus_allowed;
set_task_cpu(p, smp_processor_id());设置当前进程的CPU

/*
* Check for pending SIGKILL! The new thread should not be allowed
* to slip out of an OOM kill. (or normal SIGKILL.)
*/
if (sigismember(&current->pending.signal, SIGKILL)) {
write_unlock_irq(&tasklist_lock);
retval = -EINTR;
goto bad_fork_cleanup_namespace;
}

/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
p->real_parent = current->real_parent;
else
p->real_parent = current;
p->parent = p->real_parent;

if (clone_flags & CLONE_THREAD) {
spin_lock(&current->sighand->siglock);
/*
* Important: if an exit-all has been started then
* do not create this new thread - the whole thread
* group is supposed to exit anyway.
*/
if (current->signal->flags & SIGNAL_GROUP_EXIT) {
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
p->group_leader = current->group_leader;

if (current->signal->group_stop_count > 0) {
/*
* There is an all-stop in progress for the group.
* We ourselves will stop as soon as we check signals.
* Make the new thread part of that group stop too.
*/
current->signal->group_stop_count++;
set_tsk_thread_flag(p, TIF_SIGPENDING);
}

spin_unlock(&current->sighand->siglock);
}

SET_LINKS(p);把新进程描述符插入进程链表

#define SET_LINKS(p) do {					\
	if (thread_group_leader(p))				\
		list_add_tail(&(p)->tasks,&init_task.tasks);	\
	add_parent(p, (p)->parent);				\
	} while (0)

if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);

attach_pid(p, PIDTYPE_PID, p->pid);
attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
attach_pid(p, PIDTYPE_PGID, process_group(p));
attach_pid(p, PIDTYPE_SID, p->signal->session);
if (p->pid)
__get_cpu_var(process_counts)++;
}

nr_threads++;
total_forks++;
write_unlock_irq(&tasklist_lock);
retval = 0;

fork_out:
if (retval)
return ERR_PTR(retval);
return p;

bad_fork_cleanup_namespace:
exit_namespace(p);
bad_fork_cleanup_keys:
exit_keys(p);
bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
exit_signal(p);
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
exit_sem(p);
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_security:
security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
mpol_free(p->mempolicy);
#endif
bad_fork_cleanup:
if (p->binfmt)
module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
module_put(p->thread_info->exec_domain->module);
bad_fork_cleanup_count:
put_group_info(p->group_info);
atomic_dec(&p->user->processes);
free_uid(p->user);
bad_fork_free:
free_task(p);
goto fork_out;

}

你可能感兴趣的:(copy_prodess)

copy_prodess u012681083
在fork（）函数中最重要的是copy_peocess函数实现进程的复制asmlinkageintsys_fork(structpt_regsregs){returndo_fork(SIGCHLD,regs.esp,®s,0,NULL,NULL);}asmlinkageintsys_vfork(structpt_regsregs){returndo_fork(CLONE_VFORK|CLON
Nginx负载均衡 510888780 nginx 应用服务器
Nginx负载均衡一些基础知识: nginx 的 upstream目前支持 4 种方式的分配 1)、轮询（默认）每个请求按时间顺序逐一分配到不同的后端服务器，如果后端服务器down掉，能自动剔除。 2)、weight 指定轮询几率，weight和访问比率成正比
RedHat 6.4 安装 rabbitmq bylijinnan erlang rabbitmq redhat
在 linux 下安装软件就是折腾，首先是测试机不能上外网要找运维开通，开通后发现测试机的 yum 不能使用于是又要配置 yum 源，最后安装 rabbitmq 时也尝试了两种方法最后才安装成功机器版本： [root@redhat1 rabbitmq]# lsb_release LSB Version: :base-4.0-amd64:base-4.0-noarch:core
FilenameUtils工具类 eksliang FilenameUtils common-io
转载请出自出处：http://eksliang.iteye.com/blog/2217081 一、概述这是一个Java操作文件的常用库，是Apache对java的IO包的封装，这里面有两个非常核心的类FilenameUtils跟FileUtils，其中FilenameUtils是对文件名操作的封装;FileUtils是文件封装，开发中对文件的操作，几乎都可以在这个框架里面找到。非常的好用。
xml文件解析SAX 不懂事的小屁孩 xml
xml文件解析:xml文件解析有四种方式， 1.DOM生成和解析XML文档(SAX是基于事件流的解析) 2.SAX生成和解析XML文档(基于XML文档树结构的解析) 3.DOM4J生成和解析XML文档 4.JDOM生成和解析XML 本文章用第一种方法进行解析，使用android常用的DefaultHandler import org.xml.sax.Attributes;
通过定时任务执行mysql的定期删除和新建分区，此处是按日分区酷的飞上天空 mysql
使用python脚本作为命令脚本，linux的定时任务来每天定时执行 #!/usr/bin/python # -*- coding: utf8 -*- import pymysql import datetime import calendar #要分区的表 table_name = 'my_table' #连接数据库的信息 host,user,passwd,db =
如何搭建数据湖架构？听听专家的意见蓝儿唯美架构
Edo Interactive在几年前遇到一个大问题：公司使用交易数据来帮助零售商和餐馆进行个性化促销，但其数据仓库没有足够时间去处理所有的信用卡和借记卡交易数据 “我们要花费27小时来处理每日的数据量，”Edo主管基础设施和信息系统的高级副总裁Tim Garnto说道：“所以在2013年，我们放弃了现有的基于PostgreSQL的关系型数据库系统，使用了Hadoop集群作为公司的数
spring学习——控制反转与依赖注入 a-john spring
控制反转（Inversion of Control，英文缩写为IoC）是一个重要的面向对象编程的法则来削减计算机程序的耦合问题，也是轻量级的Spring框架的核心。控制反转一般分为两种类型，依赖注入（Dependency Injection，简称DI）和依赖查找（Dependency Lookup）。依赖注入应用比较广泛。
用spool+unixshell生成文本文件的方法 aijuans xshell
例如我们把scott.dept表生成文本文件的语句写成dept.sql,内容如下: 　　set pages 50000; 　　set lines 200; 　　set trims on; 　　set heading off; 　　spool /oracle_backup/log/test/dept.lst; 　　select deptno||','||dname||','||loc
1、基础--名词解析(OOA/OOD/OOP) asia007 学习基础知识
OOA:Object-Oriented Analysis（面向对象分析方法）是在一个系统的开发过程中进行了系统业务调查以后，按照面向对象的思想来分析问题。OOA与结构化分析有较大的区别。OOA所强调的是在系统调查资料的基础上，针对OO方法所需要的素材进行的归类分析和整理，而不是对管理业务现状和方法的分析。　　OOA（面向对象的分析）模型由5个层次（主题层、对象类层、结构层、属性层和服务层）
浅谈java转成json编码格式技术百合不是茶 json编码 java转成json编码
json编码;是一个轻量级的数据存储和传输的语言在java中需要引入json相关的包,引包方式在工程的lib下就可以了 JSON与JAVA数据的转换（JSON 即 JavaScript Object Natation，它是一种轻量级的数据交换格式，非常适合于服务器与 JavaScript 之间的数据的交
web.xml之Spring配置(基于Spring+Struts+Ibatis) bijian1013 java web.xml SSI spring配置
指定Spring配置文件位置 <context-param> <param-name>contextConfigLocation</param-name> <param-value> /WEB-INF/spring-dao-bean.xml,/WEB-INF/spring-resources.xml, /WEB-INF/
Installing SonarQube（Fail to download libraries from server） sunjing Install Sonar
1. Download and unzip the SonarQube distribution 2. Starting the Web Server The default port is "9000" and the context path is "/". These values can be changed in &l
【MongoDB学习笔记十一】Mongo副本集基本的增删查 bit1129 mongodb
一、创建复本集假设mongod,mongo已经配置在系统路径变量上，启动三个命令行窗口，分别执行如下命令： mongod --port 27017 --dbpath data1 --replSet rs0 mongod --port 27018 --dbpath data2 --replSet rs0 mongod --port 27019 -
Anychart图表系列二之执行Flash和HTML5渲染白糖_ Flash
今天介绍Anychart的Flash和HTML5渲染功能 HTML5 Anychart从6.0第一个版本起，已经逐渐开始支持各种图的HTML5渲染效果了，也就是说即使你没有安装Flash插件，只要浏览器支持HTML5，也能看到Anychart的图形（不过这些是需要做一些配置的）。这里要提醒下大家，Anychart6.0版本对HTML5的支持还不算很成熟，目前还处于
Laravel版本更新异常4.2.8-> 4.2.9 Declaration of ... CompilerEngine ... should be compa bozch laravel
昨天在为了把laravel升级到最新的版本，突然之间就出现了如下错误： ErrorException thrown with message "Declaration of Illuminate\View\Engines\CompilerEngine::handleViewException() should be compatible with Illuminate\View\Eng
编程之美-NIM游戏分析-石头总数为奇数时如何保证先动手者必胜 bylijinnan 编程之美
import java.util.Arrays; import java.util.Random; public class Nim { /**编程之美 NIM游戏分析问题：有N块石头和两个玩家A和B，玩家A先将石头随机分成若干堆，然后按照BABA...的顺序不断轮流取石头，能将剩下的石头一次取光的玩家获胜，每次取石头时，每个玩家只能从若干堆石头中任选一堆，
lunce创建索引及简单查询 chengxuyuancsdn 查询创建索引 lunce
import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Docume
[IT与投资]坚持独立自主的研究核心技术 comsci it
和别人合作开发某项产品....如果互相之间的技术水平不同,那么这种合作很难进行,一般都会成为强者控制弱者的方法和手段..... 所以弱者,在遇到技术难题的时候,最好不要一开始就去寻求强者的帮助,因为在我们这颗星球上,生物都有一种控制其
flashback transaction闪回事务查询 daizj oracle sql 闪回事务
闪回事务查询有别于闪回查询的特点有以下3个：（1）其正常工作不但需要利用撤销数据，还需要事先启用最小补充日志。（2）返回的结果不是以前的“旧”数据，而是能够将当前数据修改为以前的样子的撤销SQL（Undo SQL）语句。（3）集中地在名为flashback_transaction_query表上查询，而不是在各个表上通过“as of”或“vers
Java I/O之FilenameFilter类列举出指定路径下某个扩展名的文件游其是你 FilenameFilter
这是一个FilenameFilter类用法的例子，实现的列举出“c:\\folder“路径下所有以“.jpg”扩展名的文件。 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
C语言学习五函数，函数的前置声明以及如何在软件开发中合理的设计函数来解决实际问题 dcj3sjt126com c
# include <stdio.h> int f(void) //括号中的void表示该函数不能接受数据，int表示返回的类型为int类型 { return 10; //向主调函数返回10 } void g(void) //函数名前面的void表示该函数没有返回值 { //return 10; //error 与第8行行首的void相矛盾 } in
今天在测试环境使用yum安装，遇到一个问题： Error: Cannot retrieve metalink for repository: epel. Pl dcj3sjt126com centos
今天在测试环境使用yum安装，遇到一个问题： Error: Cannot retrieve metalink for repository: epel. Please verify its path and try again 处理很简单，修改文件“/etc/yum.repos.d/epel.repo”，将baseurl的注释取消， mirrorlist注释掉。即可。 &n
单例模式 shuizhaosi888 单例模式
单例模式懒汉式 public class RunMain { /** * 私有构造 */ private RunMain() { } /** * 内部类，用于占位，只有 */ private static class SingletonRunMain { priv
Spring Security（09）——Filter 234390216 Spring Security
Filter 目录 1.1 Filter顺序 1.2 添加Filter到FilterChain 1.3 DelegatingFilterProxy 1.4 FilterChainProxy 1.5
公司项目NODEJS实践0.1 逐行分析JS源代码 mongodb nginx ubuntu nodejs
一、前言前端如何独立用nodeJs实现一个简单的注册、登录功能，是不是只用nodejs+sql就可以了？其实是可以实现，但离实际应用还有距离，那要怎么做才是实际可用的。网上有很多nod
java.lang.Math liuhaibo_ljf java Math lang
System.out.println(Math.PI); System.out.println(Math.abs(1.2)); System.out.println(Math.abs(1.2)); System.out.println(Math.abs(1)); System.out.println(Math.abs(111111111)); System.out.println(Mat
linux下时间同步 nonobaba ntp
今天在linux下做hbase集群的时候，发现hmaster启动成功了，但是用hbase命令进入shell的时候报了一个错误 PleaseHoldException: Master is initializing，查看了日志，大致意思是说master和slave时间不同步，没办法，只好找一种手动同步一下，后来发现一共部署了10来台机器，手动同步偏差又比较大，所以还是从网上找现成的解决方
ZooKeeper3.4.6的集群部署 roadrunners zookeeper 集群部署
ZooKeeper是Apache的一个开源项目，在分布式服务中应用比较广泛。它主要用来解决分布式应用中经常遇到的一些数据管理问题，如：统一命名服务、状态同步、集群管理、配置文件管理、同步锁、队列等。这里主要讲集群中ZooKeeper的部署。 1、准备工作我们准备3台机器做ZooKeeper集群，分别在3台机器上创建ZooKeeper需要的目录。数据存储目录
Java高效读取大文件 tomcat_oracle java
　　读取文件行的标准方式是在内存中读取，Guava 和Apache Commons IO都提供了如下所示快速读取文件行的方法：　　Files.readLines(new File(path), Charsets.UTF_8); 　　FileUtils.readLines(new File(path)); 　　这种方法带来的问题是文件的所有行都被存放在内存中，当文件足够大时很快就会导致
微信支付api返回的xml转换为Map的方法 xu3508620 xml map 微信api
举例如下： <xml> <return_code><![CDATA[SUCCESS]]></return_code> <return_msg><![CDATA[OK]]></return_msg> <appid><