Linux内核源代码情景分析-系统调用brk()

     首先看下进程地址空间示意图:



    我们简单的说,从低地址到高地址,代码区和数据区,空洞,堆栈区。
    在Linux内核源代码情景分析-内存管理之用户堆栈的扩展,我们申请了从堆栈区往下,数据区上面的页面。
    在Linux内核源代码情景分析-内存管理之用户页面的换入,我们申请了用于换入/换出的页面。
    在本文中,我们申请的是从数据区往上,堆栈区下面的页面。
    我们通过一个实例来分析,brk(),见下图:

    


    1、由于新边界比旧边界地址高,我们申请旧边界和新边界之间的页面。就是把对应的虚拟地址映射到物理页面。
    brk对应的系统调用是sys_brk,代码如下:

asmlinkage unsigned long sys_brk(unsigned long brk)
{
	unsigned long rlim, retval;
	unsigned long newbrk, oldbrk;
	struct mm_struct *mm = current->mm;

	down(&mm->mmap_sem);

	if (brk < mm->end_code)//brk不能大于代码段末端地址
		goto out;
	newbrk = PAGE_ALIGN(brk);
	oldbrk = PAGE_ALIGN(mm->brk);
	if (oldbrk == newbrk)
		goto set_brk;

	/* Always allow shrinking brk. */
	if (brk <= mm->brk) {//目前新边界大于旧边界
		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
			goto set_brk;
		goto out;
	}

	/* Check against rlimit.. */
	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)//不能超过限制
		goto out;

	/* Check against existing mmap mappings. */
	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))//看看是否与已经存在的虚拟空间有冲突
		goto out;

	/* Check if we have enough memory.. */
	if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))//是否有足够的空闲内存页面
		goto out;

	/* Ok, looks good - let it rip. */
	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)//新边界大于旧边界,建立映射
		goto out;
set_brk:
	mm->brk = brk;//brk设置为新的边界
out:
	retval = mm->brk;
	up(&mm->mmap_sem);
	return retval;
}


    本例中,新边界大于旧边界,我们建立映射。do_brk代码如下:

unsigned long do_brk(unsigned long addr, unsigned long len)
{
	struct mm_struct * mm = current->mm;
	struct vm_area_struct * vma;
	unsigned long flags, retval;

	len = PAGE_ALIGN(len);
	if (!len)
		return addr;

	/*
	 * mlock MCL_FUTURE?
	 */
	if (mm->def_flags & VM_LOCKED) {
		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
		locked += len;
		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
			return -EAGAIN;
	}

	/*
	 * Clear old maps.  this also does some error checking for us
	 */
	retval = do_munmap(mm, addr, len);//find_vm_intersection对冲突的检查,实际上检查的只是新区的高端,没有检查低端。对于低端的冲突是允许的,解决的办法是以新的映射为准,先通过do_munmap把原来的映射解除,再来建立映射
	if (retval != 0)
		return retval;

	/* Check against address space limits *after* clearing old maps... */
	if ((mm->total_vm << PAGE_SHIFT) + len   
	    > current->rlim[RLIMIT_AS].rlim_cur)
		return -ENOMEM;

	if (mm->map_count > MAX_MAP_COUNT)
		return -ENOMEM;

	if (!vm_enough_memory(len >> PAGE_SHIFT))
		return -ENOMEM;

	flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
				MAP_FIXED|MAP_PRIVATE) | mm->def_flags;

	flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
	

	/* Can we just expand an old anonymous mapping? */
	if (addr) {//先看看是否可以跟原有的区间合并
		struct vm_area_struct * vma = find_vma(mm, addr-1);
		if (vma && vma->vm_end == addr && !vma->vm_file && 
		    vma->vm_flags == flags) {
			vma->vm_end = addr + len;
			goto out;
		}
	}	


	/*
	 * create a vma struct for an anonymous mapping
	 */
	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);//如果不能跟原有的区间合并,则分配vm_area_struct结构
	if (!vma)
		return -ENOMEM;

	vma->vm_mm = mm;
	vma->vm_start = addr;//起始地址
	vma->vm_end = addr + len;//结束地址
	vma->vm_flags = flags;
	vma->vm_page_prot = protection_map[flags & 0x0f];
	vma->vm_ops = NULL;
	vma->vm_pgoff = 0;
	vma->vm_file = NULL;
	vma->vm_private_data = NULL;

	insert_vm_struct(mm, vma);

out:
	mm->total_vm += len >> PAGE_SHIFT;
	if (flags & VM_LOCKED) {//仅在对区间加锁时才调用make_pages_present
		mm->locked_vm += len >> PAGE_SHIFT;
		make_pages_present(addr, addr + len);//为新增的虚拟空间建立起对内存页面的映射
	}
	return addr;
}

    make_pages_present,代码如下:
int make_pages_present(unsigned long addr, unsigned long end)
{
	int write;
	struct mm_struct *mm = current->mm;
	struct vm_area_struct * vma;

	vma = find_vma(mm, addr);
	write = (vma->vm_flags & VM_WRITE) != 0;
	if (addr >= end)
		BUG();
	do {
		if (handle_mm_fault(mm, vma, addr, write) < 0)
			return -1;
		addr += PAGE_SIZE;
	} while (addr < end);
	return 0;
}

   这里所用的方法很有趣,那就是对新区间中的每一个页面模拟一次缺页异常。


    最后返回sys_brk,mm->brk设置为新的边界。

    2、由于新边界比旧边界地址低,我们释放新边界和旧边界之前的页面。如下图:

    

你可能感兴趣的:(Linux内核源代码情景分析-系统调用brk())