由于Linux0.11很多模块尚未跟进,而Linux1.0相比与现在的版本,基本功能,尚属完善,所以后续将研究Linux1.0源码,事实上阅读早期版本的源码,更有利于理解其过程。
关于进程的前面已有介绍进程基础编程、进程管理(linux2.6)
Linux 1.0\kernel\fork.c
unix 系统通过 fork 系统调用创建一个进程,fork.c 的主要任务是为新的进程填写数据结构,相关步骤有:
/* * linux/kernel/fork.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * 'fork.c' contains the help-routines for the 'fork' system call * (see also system_call.s). * Fork is rather simple, once you get the hang of it, but the memory * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()' */ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/segment.h> #include <linux/ptrace.h> #include <linux/malloc.h> #include <linux/ldt.h> #include <asm/segment.h> #include <asm/system.h> asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); /* These should maybe be in <linux/tasks.h> */ #define MAX_TASKS_PER_USER (NR_TASKS/2) #define MIN_TASKS_LEFT_FOR_ROOT 4 extern int shm_fork(struct task_struct *, struct task_struct *); long last_pid=0; //为新进程取得不重复的进程号,并返回在任务数组中的任务编号 //进程号即进程id,跟任务编号是两个概念 static int find_empty_process(void) { int free_task; int i, tasks_free; int this_user_tasks; repeat: //系统最大进程号为0x8000-1 //如果恰好这么大,则让进程号从1开始,0为init进程 if ((++last_pid) & 0xffff8000) last_pid=1; this_user_tasks = 0; tasks_free = 0; free_task = -EAGAIN; i = NR_TASKS; //i为系统的最大任务数 while (--i > 0) { //如果对应i的任务号没有被占用,让free_task等于该值,同时自增可用的任务数 if (!task[i]) { free_task = i; tasks_free++; continue; } //用户id拷贝 if (task[i]->uid == current->uid) this_user_tasks++; //进程id、进程组id、会话id 都等于该进程id if (task[i]->pid == last_pid || task[i]->pgrp == last_pid || task[i]->session == last_pid) goto repeat; } //任务数溢出 if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT ||//可用任务数少于最低需要 this_user_tasks > MAX_TASKS_PER_USER)// if (current->uid) return -EAGAIN;//当前进程数达到系统规定的上限,errno设置为EAGAIN //返回找到的任务号 return free_task; } //拷贝父进程打开的所有文件描述符,共享一个文件表项 static struct file * copy_fd(struct file * old_file) { //从全局文件描述符中取个新的描述符 struct file * new_file = get_empty_filp(); int error; if (new_file) { //拷贝老的文件描述符到新的 memcpy(new_file,old_file,sizeof(struct file)); new_file->f_count = 1;//文件引用计数 //若新描述符占用了i节点,增加其引用计数 if (new_file->f_inode) new_file->f_inode->i_count++; //调用对应的open函数,并打开,子进程同样打开父进程对应的文件 if (new_file->f_op && new_file->f_op->open) { error = new_file->f_op->open(new_file->f_inode,new_file); //出错处理:如果失败,则释放其i节点 if (error) { iput(new_file->f_inode); new_file->f_count = 0; new_file = NULL; } } } //返回新的文件描述符 return new_file; } //拷贝父进程的mmap信息,tsk是子进程,current是父进程 //因为linux采用了copy-on-write 技术,所以在执行exec之前 //父子进程两者的虚拟空间不同,但其映射的物理空间是同一个,存储映射是一样的 int dup_mmap(struct task_struct * tsk) { //虚拟地址空间 struct vm_area_struct * mpnt, **p, *tmp; tsk->mmap = NULL; tsk->stk_vma = NULL; p = &tsk->mmap;//物理内存页面 //链表形式组织数据,vm_next指向下一个vm_area_struct结构 for (mpnt = current->mmap ; mpnt ; mpnt = mpnt->vm_next) { //寻找空闲的满足要求的一段连续的虚拟地址空间,这样父子进程具备不同的虚拟地址空间 tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!tmp) return -ENOMEM;//系统内存不足,errno设置为ENOMEM *tmp = *mpnt;//指向了同一个mmap 物理内存 tmp->vm_task = tsk;//绑定到子进程tsk tmp->vm_next = NULL;//子进程默认下一个vm_area_struct结构为空 //vm_inode指向虚拟地址映射的磁盘文件或设备文件的内容的inode结构体 if (tmp->vm_inode) tmp->vm_inode->i_count++;//引用计数加1 *p = tmp;//子进程的第一个vm_area_struct的物理内存映射 p = &tmp->vm_next;//指向下一个vm_area_struct,然后循环让子进程与父进程的虚拟空间映射到同一物理页面 //指向vm_area_struct 链表的链表头 if (current->stk_vma == mpnt) tsk->stk_vma = tmp; } return 0; } #define IS_CLONE (regs.orig_eax == __NR_clone) #define copy_vm(p) ((clone_flags & COPYVM)?copy_page_tables(p):clone_page_tables(p)) /* * Ok, this is the main fork-routine. It copies the system process * information (task[nr]) and sets up the necessary registers. It * also copies the data segment in its entirety. */ /*fork的主程序,拷贝系统进程的信息(task[nr])并且设置必要的寄存器,同时完全拷贝数据段 regs为用户态进入内核态时的堆栈排布结构*/ asmlinkage int sys_fork(struct pt_regs regs) { struct pt_regs * childregs; struct task_struct *p; int i,nr; struct file *f; unsigned long clone_flags = COPYVM | SIGCHLD; //获取一个空闲内存页面来保存task_struct if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL))) goto bad_fork; //查找空闲的进程槽并设置全局进程号(last_pid) nr = find_empty_process(); if (nr < 0) goto bad_fork_free; //复制父进程的信息到获取的内存中 task[nr] = p; *p = *current;//复制当前进程内容,共享 p->did_exec = 0; p->kernel_stack_page = 0; p->state = TASK_UNINTERRUPTIBLE;//置子进程为不可中断状态 p->flags &= ~(PF_PTRACED|PF_TRACESYS); p->pid = last_pid;//进程pid p->swappable = 1; //把该任务加入任务双向链表中 p->p_pptr = p->p_opptr = current; p->p_cptr = NULL; SET_LINKS(p);//include/linux/sched.h p->signal = 0;//信号位 p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; p->leader = 0; /* process leadership doesn't inherit */ p->utime = p->stime = 0;//初始化用户态时间和核心态时间 p->cutime = p->cstime = 0;//初始化子进程用户态和核心态时间 p->min_flt = p->maj_flt = 0; p->cmin_flt = p->cmaj_flt = 0; p->start_time = jiffies; /* * set up new TSS and kernel stack */ //为核心堆栈页kernel_stack_page取得另一个空闲的内存页面 if (!(p->kernel_stack_page = __get_free_page(GFP_KERNEL))) goto bad_fork_cleanup; //各类寄存器设置 p->tss.es = KERNEL_DS; p->tss.cs = KERNEL_CS; p->tss.ss = KERNEL_DS; p->tss.ds = KERNEL_DS; p->tss.fs = USER_DS; p->tss.gs = KERNEL_DS; p->tss.ss0 = KERNEL_DS; p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE; //设置TSS(任务状态段) p->tss.tr = _TSS(nr); //childregs指向核心堆栈栈顶 childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1; p->tss.esp = (unsigned long) childregs; p->tss.eip = (unsigned long) ret_from_sys_call; //传入的堆栈寄存器内容复制到childregs中(子进程寄存器) *childregs = regs; //新进程返回0的原因 childregs->eax = 0; p->tss.back_link = 0; p->tss.eflags = regs.eflags & 0xffffcfff; /* iopl is always 0 for a new process */ //如果当前进程是调用sys_clone系统调用的话 if (IS_CLONE) { if (regs.ebx) childregs->esp = regs.ebx; clone_flags = regs.ecx; if (childregs->esp == regs.esp) clone_flags |= COPYVM; } p->exit_signal = clone_flags & CSIGNAL; //局部描述符传传入任务状态段的idt p->tss.ldt = _LDT(nr); if (p->ldt) {//当前任务的ldt不为空 //让其指向新分配的内存 p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); if (p->ldt != NULL) //如果分配成功,则拷贝当前进程的ldt memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); } //计算io_bitmap在tss_struct中的偏移量 p->tss.bitmap = offsetof(struct tss_struct,io_bitmap); //置位 for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */ p->tss.io_bitmap[i] = ~0; //如果当前进程使用了协处理器,就保存其上下文 if (last_task_used_math == current) __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387)); p->semun = NULL; p->shm = NULL; if (copy_vm(p) || shm_fork(current, p)) goto bad_fork_cleanup; //如果是克隆的 if (clone_flags & COPYFD) { //且当前有文件是打开的,就把该文件描述符拷贝到新创建的进程中 for (i=0; i<NR_OPEN;i++) if ((f = p->filp[i]) != NULL) p->filp[i] = copy_fd(f); } else { //否则,将对应文件的打开次数加1 for (i=0; i<NR_OPEN;i++) if ((f = p->filp[i]) != NULL) f->f_count++; } //下面是引用计数加1 if (current->pwd) current->pwd->i_count++; if (current->root) current->root->i_count++; if (current->executable) current->executable->i_count++; //复制父进程的 mmap 信息,即物理内存页面信息 dup_mmap(p); //在gdt中设置新任务的tss set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss)); //如果已经设置了当前任务的ldt,则在gdt设置该ldt if (p->ldt) set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512); else //否则,采用默认ldt set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1); // p->counter = current->counter >> 1; //进程状态设置为可运行态 p->state = TASK_RUNNING; /* do this last, just in case */ return p->pid;//父进程返回:返回子进程id //下面为异常出错处理 bad_fork_cleanup: task[nr] = NULL; REMOVE_LINKS(p); free_page(p->kernel_stack_page); bad_fork_free: free_page((long) p); bad_fork: return -EAGAIN; }