Linux 缺页中断发展历史

慢慢来吧~~


Linux V0.11




缺页中断响应代码:

.globl _page_fault

_page_fault:
	xchgl %eax,(%esp)
	pushl %ecx
	pushl %edx
	push %ds
	push %es
	push %fs
	movl $0x10,%edx
	mov %dx,%ds
	mov %dx,%es
	mov %dx,%fs
	movl %cr2,%edx
	pushl %edx
	pushl %eax
	testl $1,%eax         // 检测当前页的共享位
	jne 1f                   
	call _do_no_page      // 没有共享就缺页
	jmp 2f
1:	call _do_wp_page      // 有共享就是写时复制
2:	addl $8,%esp
	pop %fs
	pop %es
	pop %ds
	popl %edx
	popl %ecx
	popl %eax
	iret

核心接口:

1. 缺页处理接口  do_no_page

void do_no_page(unsigned long error_code,unsigned long address)
{
	int nr[4];
	unsigned long tmp;
	unsigned long page;
	int block,i;

	address &= 0xfffff000;
	tmp = address - current->start_code;
	if (!current->executable || tmp >= current->end_data) {
		get_empty_page(address);
		return;
	}
	if (share_page(tmp))
		return;
	if (!(page = get_free_page()))
		oom();
/* remember that 1 block is used for header */
	block = 1 + tmp/BLOCK_SIZE;
	for (i=0 ; i<4 ; block++,i++)
		nr[i] = bmap(current->executable,block);
	bread_page(page,current->executable->i_dev,nr);
	i = tmp + 4096 - current->end_data;
	tmp = page + 4096;
	while (i-- > 0) {
		tmp--;
		*(char *)tmp = 0;
	}
	if (put_page(page,address))
		return;
	free_page(page);
	oom();
}

流程图

Linux 缺页中断发展历史_第1张图片



2.写时复制的复制函数: do_wp_page

void un_wp_page(unsigned long * table_entry)
{
	unsigned long old_page,new_page;

	old_page = 0xfffff000 & *table_entry;
	if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
		*table_entry |= 2;
		invalidate();
		return;
	}
	if (!(new_page=get_free_page()))
		oom();
	if (old_page >= LOW_MEM)
		mem_map[MAP_NR(old_page)]--;
	*table_entry = new_page | 7;
	invalidate();
	copy_page(old_page,new_page);
}	

/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * If it's in code space we exit with a segment error.
 */
void do_wp_page(unsigned long error_code,unsigned long address)
{
#if 0
/* we cannot do this yet: the estdio library writes to code space */
/* stupid, stupid. I really want the libc.a from GNU */
	if (CODE_SPACE(address))
		do_exit(SIGSEGV);
#endif
	un_wp_page((unsigned long *)
		(((address>>10) & 0xffc) + (0xfffff000 &
		*((unsigned long *) ((address>>20) &0xffc)))));

}

3. 写时复制函数

void write_verify(unsigned long address)
{
	unsigned long page;

	if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) //页表有效
		return;
	page &= 0xfffff000;
	page += ((address>>10) & 0xffc); // 页表偏移
	if ((3 & *(unsigned long *) page) == 1)  /* non-writeable, present */
		un_wp_page((unsigned long *) page);
	return;
}



Linux V0.12

1. 对do_wp_page 添加了参数有效性检测:


void do_wp_page(unsigned long error_code,unsigned long address)
{
	if (address < TASK_SIZE)
		printk("\n\rBAD! KERNEL MEMORY WP-ERR!\n\r");
	if (address - current->start_code > TASK_SIZE) {
		printk("Bad things happen: page error in do_wp_page\n\r");
		do_exit(SIGSEGV);
	}

	un_wp_page((unsigned long *)
		(((address>>10) & 0xffc) + (0xfffff000 &
		*((unsigned long *) ((address>>20) &0xffc)))));

}

2. do_no_page 加入了地址有效性和对虚拟内存的支持:

源码:

void do_no_page(unsigned long error_code,unsigned long address)
{
	int nr[4];
	unsigned long tmp;
	unsigned long page;
	int block,i;
	struct m_inode * inode;

	if (address < TASK_SIZE)
		printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
	if (address - current->start_code > TASK_SIZE) {
		printk("Bad things happen: nonexistent page error in do_no_page\n\r");
		do_exit(SIGSEGV);
	}
	page = *(unsigned long *) ((address >> 20) & 0xffc);
	if (page & 1) {
		page &= 0xfffff000;
		page += (address >> 10) & 0xffc;
		tmp = *(unsigned long *) page;
		if (tmp && !(1 & tmp)) {
			swap_in((unsigned long *) page);
			return;
		}
	}
	address &= 0xfffff000;
	tmp = address - current->start_code;
	if (tmp >= LIBRARY_OFFSET ) {
		inode = current->library;
		block = 1 + (tmp-LIBRARY_OFFSET) / BLOCK_SIZE;
	} else if (tmp < current->end_data) {
		inode = current->executable;
		block = 1 + tmp / BLOCK_SIZE;
	} else {
		inode = NULL;
		block = 0;
	}
	if (!inode) {
		get_empty_page(address);
		return;
	}
	if (share_page(inode,tmp))
		return;
	if (!(page = get_free_page()))
		oom();
/* remember that 1 block is used for header */
	for (i=0 ; i<4 ; block++,i++)
		nr[i] = bmap(inode,block);
	bread_page(page,inode->i_dev,nr);
	i = tmp + 4096 - current->end_data;
	if (i>4095)
		i = 0;
	tmp = page + 4096;
	while (i-- > 0) {
		tmp--;
		*(char *)tmp = 0;
	}
	if (put_page(page,address))
		return;
	free_page(page);
	oom();
}

流程图:

Linux 缺页中断发展历史_第2张图片


Linux V0.95

1 . 首先, 缺页中段的响应代码从page.s移除( page.s 不存在了) , 加入到了 kernek/asm.s.  且直接调用do_page_fault

_page_fault:
	pushl $_do_page_fault
	jmp error_code

2. 对与引起缺页中断的原有的判断在 do_page_fault内

/* This routine handles page faults.  It determines the address,
   and the problem then passes it off to one of the appropriate
   routines. */
void do_page_fault (unsigned long *esp, unsigned long error_code)
{
	unsigned long address;
	/* get the address */

	__asm__ ("movl %%cr2,%0":"=r" (address));
	if (!(error_code & 1)) {
		do_no_page(error_code, address, current);
		return;
	} else {
		do_wp_page(error_code, address);
		return;
	}
}

3. 写时复制调用的的接口 un_wp_page 对于内存耗尽的情况做了循环和更多的保护, 不再直接操作page数组的count , 该用free_page 来释放一个引用.

void un_wp_page(unsigned long * table_entry)
{
	unsigned long old_page;
	unsigned long new_page = 0;
	unsigned long dirty;

repeat:
	old_page = *table_entry;
	dirty = old_page & PAGE_DIRTY;
	if (!(old_page & 1)) {
		if (new_page)
			free_page(new_page);
		return;
	}
	old_page &= 0xfffff000;
	if (old_page >= HIGH_MEMORY) {
		if (new_page)
			free_page(new_page);
		printk("bad page address\n\r");
		do_exit(SIGSEGV);
	}
	if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
		*table_entry |= 2;
		invalidate();
		if (new_page)
			free_page(new_page);
		return;
	}
	if (!new_page) {
		if (!(new_page=get_free_page()))
			oom();                           // 防止 oom 没有推出程序
		goto repeat;                             // 再来一遍
	}
	copy_page(old_page,new_page);
	*table_entry = new_page | dirty | 7 ;
	free_page(old_page);                             // 释放一个引用
	invalidate();
}	

4. do_no_page 加入更多检测合法性的代码.


Linux V0.95a

加入更多的合法性检测

if (*page_table) {
	printk("put_dirty_page: page already exists\n");
	*page_table = 0;
	invalidate();
}

Linux V0.95c

加入一点对进程的操作

例如在do_wp_page中

	++current->min_flt;

Linux V0.96b

将获取空内存页的接口进行了一层包装, 处理了内存不足的问题. 从而像接口  do_no_page 之类的可以不再考虑内存不足.

/*
 * fill in an empty page or directory if none exists
 */
static unsigned long get_empty(unsigned long * p)
{
	unsigned long page = 0;

repeat:
	if (1 & *p) {
		free_page(page);
		return *p;
	}
	if (*p) {
		printk("get_empty: bad page entry \n");
		*p = 0;
	}
	if (page) {
		*p = page | 7;
		return *p;
	}
	if (!(page = get_free_page()))
		oom();
	goto repeat;
}

接口do_no_page

void do_no_page(unsigned long error_code, unsigned long address,
	struct task_struct *tsk, unsigned long user_esp)
{
	static unsigned int last_checked = 0;
	int nr[4];
	unsigned long tmp;
	unsigned long page;
	unsigned int block,i;
	struct inode * inode;

	/* Thrashing ? Make it interruptible, but don't penalize otherwise */
	for (i = 0; i < CHECK_LAST_NR; i++)
		if ((address & 0xfffff000) == last_pages[i]) {
			current->counter = 0;
			schedule();
		}
	last_checked++;
	if (last_checked >= CHECK_LAST_NR)
		last_checked = 0;
	last_pages[last_checked] = address & 0xfffff000;
	if (address < TASK_SIZE) {
		printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
		do_exit(SIGSEGV);
	}
	if (address - tsk->start_code >= TASK_SIZE) {
		printk("Bad things happen: nonexistent page error in do_no_page\n\r");
		do_exit(SIGSEGV);
	}
	page = get_empty((unsigned long *) ((address >> 20) & 0xffc));                // 这里直接获取新内存页, 不再考虑内存不足
	page &= 0xfffff000;
	page += (address >> 10) & 0xffc;
	tmp = *(unsigned long *) page;
	if (tmp & 1) {
		printk("bogus do_no_page\n");
		return;
	}
	++tsk->rss;
	if (tmp) {
		++tsk->maj_flt;
		swap_in((unsigned long *) page);
		return;
	}
	address &= 0xfffff000;
	tmp = address - tsk->start_code;
	inode = NULL;
	block = 0;
	if (tmp < tsk->end_data) {
		inode = tsk->executable;
		block = 1 + tmp / BLOCK_SIZE;
	} else {
		i = tsk->numlibraries;
		while (i-- > 0) {
			if (tmp < tsk->libraries[i].start)
				continue;
			block = tmp - tsk->libraries[i].start;
			if (block >= tsk->libraries[i].length)
				continue;
			inode = tsk->libraries[i].library;
			block = 1 + block / BLOCK_SIZE;
			break;
		}
	}
	if (!inode) {
		++tsk->min_flt;
		get_empty_page(address);
		if (tsk != current)
			return;
		if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)
			return;
		if (tmp+8192 >= (user_esp & 0xfffff000))
			return;
		send_sig(SIGSEGV,tsk,1);
		return;
	}
	if (tsk == current)
		if (share_page(inode,tmp)) {
			++tsk->min_flt;
			return;
		}
	++tsk->maj_flt;
	if (!(page = get_free_page()))
		oom();
	for (i=0 ; i<4 ; block++,i++)
		nr[i] = bmap(inode,block);
	bread_page(page,inode->i_dev,nr);
	i = tmp + 4096 - tsk->end_data;
	if (i>4095)
		i = 0;
	tmp = page + 4096;
	while (i--) {
		tmp--;
		*(char *)tmp = 0;
	}
	if (put_page(page,address))
		return;
	free_page(page);
	oom();
}

之后的版本不再默认一级页表总是在地址0

越来越复杂了

你可能感兴趣的:(linux,内核)