用户空间的缺页异常可以分为两种情况--
1.触发异常的线性地址处于用户空间的vma中,但还未分配物理页,如果访问权限OK的话内核就给进程分配相应的物理页了
2.触发异常的线性地址不处于用户空间的vma中,这种情况得判断是不是因为用户进程的栈空间消耗完而触发的缺页异常,如果是的话则在用户空间对栈区域进行扩展,并且分配相应的物理页,如果不是则作为一次非法地址访问来处理,内核将终结进程
下面来看do_page_fault()函数对用户空间缺页异常的处理
dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; unsigned long address; struct mm_struct *mm; int write; int fault; tsk = current; //获取当前进程 mm = tsk->mm; //获取当前进程的地址空间 /* Get the faulting address: */ address = read_cr2(); //读取CR2寄存器获取触发异常的访问地址 ... ... ... ... vma = find_vma(mm, address);//试图寻找到一个离address最近的vma,vma包含address或在address之后 /*没有找到这样的vma则说明address之后没有虚拟内存区域,因此该address肯定是无效的, 通过bad_area()路径来处理,bad_area()的主体就是__bad_area()-->bad_area_nosemaphore()*/ if (unlikely(!vma)) { bad_area(regs, error_code, address); return; } /*如果该地址包含在vma之中,则跳转到good_area处进行处理*/ if (likely(vma->vm_start <= address)) goto good_area; /*不是前面两种情况的话,则判断是不是由于用户堆栈所占的页框已经使用完,而一个PUSH指令 引用了一个尚未和页框绑定的虚拟内存区域导致的一个异常,属于堆栈的虚拟内存区,其VM_GROWSDOWN位 被置位*/ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { bad_area(regs, error_code, address);//不是堆栈区域,则用bad_area()来处理 return; } if (error_code & PF_USER) {//必须处于用户空间 /* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter * and pusha to work. ("enter $65535, $31" pushes * 32 pointers and then decrements %sp by 65535.) */ /*这里检查address,只有该地址足够高(和堆栈指针的差不大于65536+32*sizeof(unsigned long)), 才能允许用户进程扩展它的堆栈地址空间,否则bad_area()处理*/ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { bad_area(regs, error_code, address); return; } } if (unlikely(expand_stack(vma, address))) {//堆栈扩展不成功同样由bad_area()处理 bad_area(regs, error_code, address); return; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: write = error_code & PF_WRITE; /*访问权限不够则通过bad_area_access_error()处理,该函数是对__bad_area()的封装,只不过 发送给用户进程的信号为SEGV_ACCERR*/ if (unlikely(access_error(error_code, write, vma))) { bad_area_access_error(regs, error_code, address); return; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault: */ /*分配新的页表和页框*/ fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } check_v8086_mode(regs, address, tsk); up_read(&mm->mmap_sem); }
bad_area()函数的主体函数为__bad_area()-->__bad_area_nosemaphore(),这个函数在上一篇博文中分析了其对内核的非法访问的处理,现在看其对用户空间的非法访问的处理
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, int si_code) { struct task_struct *tsk = current; /* User mode accesses just cause a SIGSEGV */ /*错误发生在用户态,则向用户进程发送一个SIGSEG信号V*/ if (error_code & PF_USER) { /* * It's possible to have interrupts off here: */ local_irq_enable(); /* * Valid to do another page fault here because this one came * from user space: */ if (is_prefetch(regs, error_code, address)) return; if (is_errata100(regs, address)) return; if (unlikely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk); return; } ... ... }
在确定了这次异常是因为物理页没分配而导致后,就通过good_area路径来处理,可想而知,该路径在确定了访问权限足够后,将完成页表和物理页的分配,这个任务有handle_mm_fault()函数来完成
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; __set_current_state(TASK_RUNNING); count_vm_event(PGFAULT); if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address);//分配pud目录 if (!pud) return VM_FAULT_OOM; pmd = pmd_alloc(mm, pud, address);//分配pmd目录 if (!pmd) return VM_FAULT_OOM; pte = pte_alloc_map(mm, pmd, address);//分配pte表 if (!pte) return VM_FAULT_OOM; /*handle_pte_fault()的任务就是为pte绑定新的页框,它会根据pte页表项的情况来做不同的处理*/ return handle_pte_fault(mm, vma, address, pte, pmd, flags); }
handle_pte_fault()函数的处理比较复杂,因为它要根据pte页表项对应的物理页的不同状态来做各种不同的处理,具体的分析以后再给出。