下面再看函数do_brk,事实上do_brk和do_mmap几乎一样,因为它们的本质都是一样的;这里多说一下它由mm/mmap.c文件的系统调用SYSCALL_DEFINE1调用,系统调用的问题如前几篇文章一样,后续会有专题讨论它,这里直接看SYSCALL_DEFINE1函数源码:
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long rlim, retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
unsigned long min_brk;
down_write(&mm->mmap_sem);
#ifdef CONFIG_COMPAT_BRK
min_brk = mm->end_code;
#else
min_brk = mm->start_brk;
#endif
if (brk < min_brk)
goto out;
/*
* Check against rlimit here. If this check is done later after the test
* of oldbrk with newbrk then it can escape the test and let the data
* segment grow beyond its set limit the in case where the limit is
* not page aligned -Ram Gupta
*/
/*资源rlim不能超限,且堆段和数据段大小之和不能大于资源rlim*/
rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
(mm->end_data - mm->start_data) > rlim)
goto out;
/*newbrk由参数brk对齐得到,代表希望的堆地址
oldbrk由当前堆地址mm->brk得到,代表现在的堆地址*/
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk)
goto set_brk;
/* Always allow shrinking brk. */
/*如果新边界比现在的边界要小,那说明要执行收缩操作,即缩短堆,成功后跳到set_brk*/
if (brk <= mm->brk) {
if (!do_munmap(mm, newbrk, oldbrk-newbrk))
goto set_brk;
goto out;
}
/* Check against existing mmap mappings. */
/*想要扩大的地方已经有vma了
从本质来讲,调用do_brk也就是为了创建所需的vma,既然现在不用创建就已经有vma了,那么就可以返回了*/
/*这个函数用于寻找是否存在和希望的范围[start_addr,end_addr]有交集的vma
找到返回该vma,否则返回NULL
这里找到就说明没法增加我们的vma了,已经有vma占住位置,跳到标号out
没找到说明可以增加我们的vma,调用do_brk*/
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
goto out;
/* Ok, looks good - let it rip. */
/*扩大堆*/
if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
goto out;
set_brk:
mm->brk = brk;
out:
retval = mm->brk;
up_write(&mm->mmap_sem);
return retval;
}
首先看看函数SYSCALL_DEFINE1的参数,它的参数实际就是unsigned long brk,指定要配置的堆的地址,进程的mm的成员brk指定了当前进程的堆地址,在本函数中oldbrk和newbrk分别是当前的堆地址和希望的堆地址,如果newbrk更大,说明是free操作,即释放堆空间,将调用函数do_munmap;
如果不是释放操作,那就是申请堆空间,首先调用函数find_vma_intersection查看当前进程地址空间是否在brk处已经有vma并且长度足够,如已有则不用再申请可直接返回;否则还需调用函数do_brk扩大堆空间范围,实际上就是继续创建新的vma;
以上就是SYSCALL_DEFINE1的内容,下面看下函数do_brk,和do_mmap非常相似,源码如下:
unsigned long do_brk(unsigned long addr, unsigned long len)
{
struct mm_struct * mm = current->mm;
struct vm_area_struct * vma, * prev;
unsigned long flags;
struct rb_node ** rb_link, * rb_parent;
/*pgoff取得这个虚拟地址addr的页号*/
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
len = PAGE_ALIGN(len);
if (!len)
return addr;
/*检查给定的地址是否能够进行安全的地址映射,无需特别关注*/
error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
if (error)
return error;
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
/*创建新的vma区域之前先要寻找一块足够大小(长度为参数len)的空闲区域,本函数就是用于查找没有映射过的内存区,找到后返回这个区间的起始地址addr*/
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
if (error & ~PAGE_MASK)
return error;
/*
* mlock MCL_FUTURE?
*/
if (mm->def_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
locked = len >> PAGE_SHIFT;
locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
/*
* mm->mmap_sem is required to protect against another thread
* changing the mappings in case we sleep.
*/
verify_mm_writelocked(mm);
/*
* Clear old maps. this also does some error checking for us
*/
munmap_back:
/*函数find_vma_prepare()与find_vma()基本相同,它扫描当前进程地址空间的vm_area_struct结构所形成的红黑树,试图找到结束地址高于addr的第一个vma;如果找到,说明addr所在的vma已经在使用,也就是已经有映射存在,会去除这个映射*/
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
if (vma && vma->vm_start < addr + len) {
if (do_munmap(mm, addr, len))
return -ENOMEM;
goto munmap_back;
}
/* Check against address space limits *after* clearing old maps... */
/*在执行函数find_vma_prepare后,下面3个if判断再次检查是否资源超限*/
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory(len >> PAGE_SHIFT))
return -ENOMEM;
/* Can we just expand an old private anonymous mapping? */
/*检查前一个线性区是否可以包含新的线性区,需要它的vm_flags与新线性区是一样的,
还会试图把新线性区前面的线性区及后面的线性区合并
换句话说,能合并就不创建vma*/
vma = vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL);
/*如果合并成功,那么就无需下面的创建工作了,直接跳到标号out*/
if (vma)
goto out;
/*
* create a vma struct for an anonymous mapping
*/
/*如果合并不成功,则需要创建新的vma,从slab分配vma占用空间,并初始化,
最终加入mm的vma双向链表和vma红黑树*/
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vma->vm_flags = flags;
vma->vm_page_prot = vm_get_page_prot(flags);
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
/*更新mm的total_vm成员值,当看了proc文件系统内相关内容实现后,
可发现为什么malloc的内存泄露是通过这个成员值的变化达到可查的*/
mm->total_vm += len >> PAGE_SHIFT;
/*如果有页面锁定的标志,那么现在就要分配物理页*/
if (flags & VM_LOCKED) {
if (!mlock_vma_pages_range(vma, addr, addr + len))
mm->locked_vm += (len >> PAGE_SHIFT);
}
return addr;
}
do_brk和do_mmap非常相似,首先确定在哪里适合创建新的vma,然后去除可能存在的已有vma的干扰,最后创建(或合并)vma,最后根据vma的flags是否有锁定标志(VM_LOCKED)决定是否立即分配物理页;