慢慢来吧~~
.globl _page_fault _page_fault: xchgl %eax,(%esp) pushl %ecx pushl %edx push %ds push %es push %fs movl $0x10,%edx mov %dx,%ds mov %dx,%es mov %dx,%fs movl %cr2,%edx pushl %edx pushl %eax testl $1,%eax // 检测当前页的共享位 jne 1f call _do_no_page // 没有共享就缺页 jmp 2f 1: call _do_wp_page // 有共享就是写时复制 2: addl $8,%esp pop %fs pop %es pop %ds popl %edx popl %ecx popl %eax iret
1. 缺页处理接口 do_no_page
void do_no_page(unsigned long error_code,unsigned long address) { int nr[4]; unsigned long tmp; unsigned long page; int block,i; address &= 0xfffff000; tmp = address - current->start_code; if (!current->executable || tmp >= current->end_data) { get_empty_page(address); return; } if (share_page(tmp)) return; if (!(page = get_free_page())) oom(); /* remember that 1 block is used for header */ block = 1 + tmp/BLOCK_SIZE; for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(current->executable,block); bread_page(page,current->executable->i_dev,nr); i = tmp + 4096 - current->end_data; tmp = page + 4096; while (i-- > 0) { tmp--; *(char *)tmp = 0; } if (put_page(page,address)) return; free_page(page); oom(); }
2.写时复制的复制函数: do_wp_page
void un_wp_page(unsigned long * table_entry) { unsigned long old_page,new_page; old_page = 0xfffff000 & *table_entry; if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) { *table_entry |= 2; invalidate(); return; } if (!(new_page=get_free_page())) oom(); if (old_page >= LOW_MEM) mem_map[MAP_NR(old_page)]--; *table_entry = new_page | 7; invalidate(); copy_page(old_page,new_page); } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * If it's in code space we exit with a segment error. */ void do_wp_page(unsigned long error_code,unsigned long address) { #if 0 /* we cannot do this yet: the estdio library writes to code space */ /* stupid, stupid. I really want the libc.a from GNU */ if (CODE_SPACE(address)) do_exit(SIGSEGV); #endif un_wp_page((unsigned long *) (((address>>10) & 0xffc) + (0xfffff000 & *((unsigned long *) ((address>>20) &0xffc))))); }
void write_verify(unsigned long address) { unsigned long page; if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) //页表有效 return; page &= 0xfffff000; page += ((address>>10) & 0xffc); // 页表偏移 if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */ un_wp_page((unsigned long *) page); return; }
1. 对do_wp_page 添加了参数有效性检测:
void do_wp_page(unsigned long error_code,unsigned long address) { if (address < TASK_SIZE) printk("\n\rBAD! KERNEL MEMORY WP-ERR!\n\r"); if (address - current->start_code > TASK_SIZE) { printk("Bad things happen: page error in do_wp_page\n\r"); do_exit(SIGSEGV); } un_wp_page((unsigned long *) (((address>>10) & 0xffc) + (0xfffff000 & *((unsigned long *) ((address>>20) &0xffc))))); }
源码:
void do_no_page(unsigned long error_code,unsigned long address) { int nr[4]; unsigned long tmp; unsigned long page; int block,i; struct m_inode * inode; if (address < TASK_SIZE) printk("\n\rBAD!! KERNEL PAGE MISSING\n\r"); if (address - current->start_code > TASK_SIZE) { printk("Bad things happen: nonexistent page error in do_no_page\n\r"); do_exit(SIGSEGV); } page = *(unsigned long *) ((address >> 20) & 0xffc); if (page & 1) { page &= 0xfffff000; page += (address >> 10) & 0xffc; tmp = *(unsigned long *) page; if (tmp && !(1 & tmp)) { swap_in((unsigned long *) page); return; } } address &= 0xfffff000; tmp = address - current->start_code; if (tmp >= LIBRARY_OFFSET ) { inode = current->library; block = 1 + (tmp-LIBRARY_OFFSET) / BLOCK_SIZE; } else if (tmp < current->end_data) { inode = current->executable; block = 1 + tmp / BLOCK_SIZE; } else { inode = NULL; block = 0; } if (!inode) { get_empty_page(address); return; } if (share_page(inode,tmp)) return; if (!(page = get_free_page())) oom(); /* remember that 1 block is used for header */ for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(inode,block); bread_page(page,inode->i_dev,nr); i = tmp + 4096 - current->end_data; if (i>4095) i = 0; tmp = page + 4096; while (i-- > 0) { tmp--; *(char *)tmp = 0; } if (put_page(page,address)) return; free_page(page); oom(); }
1 . 首先, 缺页中段的响应代码从page.s移除( page.s 不存在了) , 加入到了 kernek/asm.s. 且直接调用do_page_fault
_page_fault: pushl $_do_page_fault jmp error_code
/* This routine handles page faults. It determines the address, and the problem then passes it off to one of the appropriate routines. */ void do_page_fault (unsigned long *esp, unsigned long error_code) { unsigned long address; /* get the address */ __asm__ ("movl %%cr2,%0":"=r" (address)); if (!(error_code & 1)) { do_no_page(error_code, address, current); return; } else { do_wp_page(error_code, address); return; } }
3. 写时复制调用的的接口 un_wp_page 对于内存耗尽的情况做了循环和更多的保护, 不再直接操作page数组的count , 该用free_page 来释放一个引用.
void un_wp_page(unsigned long * table_entry) { unsigned long old_page; unsigned long new_page = 0; unsigned long dirty; repeat: old_page = *table_entry; dirty = old_page & PAGE_DIRTY; if (!(old_page & 1)) { if (new_page) free_page(new_page); return; } old_page &= 0xfffff000; if (old_page >= HIGH_MEMORY) { if (new_page) free_page(new_page); printk("bad page address\n\r"); do_exit(SIGSEGV); } if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) { *table_entry |= 2; invalidate(); if (new_page) free_page(new_page); return; } if (!new_page) { if (!(new_page=get_free_page())) oom(); // 防止 oom 没有推出程序 goto repeat; // 再来一遍 } copy_page(old_page,new_page); *table_entry = new_page | dirty | 7 ; free_page(old_page); // 释放一个引用 invalidate(); }
加入更多的合法性检测
if (*page_table) { printk("put_dirty_page: page already exists\n"); *page_table = 0; invalidate(); }
加入一点对进程的操作
例如在do_wp_page中
++current->min_flt;
将获取空内存页的接口进行了一层包装, 处理了内存不足的问题. 从而像接口 do_no_page 之类的可以不再考虑内存不足.
/* * fill in an empty page or directory if none exists */ static unsigned long get_empty(unsigned long * p) { unsigned long page = 0; repeat: if (1 & *p) { free_page(page); return *p; } if (*p) { printk("get_empty: bad page entry \n"); *p = 0; } if (page) { *p = page | 7; return *p; } if (!(page = get_free_page())) oom(); goto repeat; }
接口do_no_page
void do_no_page(unsigned long error_code, unsigned long address, struct task_struct *tsk, unsigned long user_esp) { static unsigned int last_checked = 0; int nr[4]; unsigned long tmp; unsigned long page; unsigned int block,i; struct inode * inode; /* Thrashing ? Make it interruptible, but don't penalize otherwise */ for (i = 0; i < CHECK_LAST_NR; i++) if ((address & 0xfffff000) == last_pages[i]) { current->counter = 0; schedule(); } last_checked++; if (last_checked >= CHECK_LAST_NR) last_checked = 0; last_pages[last_checked] = address & 0xfffff000; if (address < TASK_SIZE) { printk("\n\rBAD!! KERNEL PAGE MISSING\n\r"); do_exit(SIGSEGV); } if (address - tsk->start_code >= TASK_SIZE) { printk("Bad things happen: nonexistent page error in do_no_page\n\r"); do_exit(SIGSEGV); } page = get_empty((unsigned long *) ((address >> 20) & 0xffc)); // 这里直接获取新内存页, 不再考虑内存不足 page &= 0xfffff000; page += (address >> 10) & 0xffc; tmp = *(unsigned long *) page; if (tmp & 1) { printk("bogus do_no_page\n"); return; } ++tsk->rss; if (tmp) { ++tsk->maj_flt; swap_in((unsigned long *) page); return; } address &= 0xfffff000; tmp = address - tsk->start_code; inode = NULL; block = 0; if (tmp < tsk->end_data) { inode = tsk->executable; block = 1 + tmp / BLOCK_SIZE; } else { i = tsk->numlibraries; while (i-- > 0) { if (tmp < tsk->libraries[i].start) continue; block = tmp - tsk->libraries[i].start; if (block >= tsk->libraries[i].length) continue; inode = tsk->libraries[i].library; block = 1 + block / BLOCK_SIZE; break; } } if (!inode) { ++tsk->min_flt; get_empty_page(address); if (tsk != current) return; if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk) return; if (tmp+8192 >= (user_esp & 0xfffff000)) return; send_sig(SIGSEGV,tsk,1); return; } if (tsk == current) if (share_page(inode,tmp)) { ++tsk->min_flt; return; } ++tsk->maj_flt; if (!(page = get_free_page())) oom(); for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(inode,block); bread_page(page,inode->i_dev,nr); i = tmp + 4096 - tsk->end_data; if (i>4095) i = 0; tmp = page + 4096; while (i--) { tmp--; *(char *)tmp = 0; } if (put_page(page,address)) return; free_page(page); oom(); }
越来越复杂了