1. 简介
对于mmap在用户态通过函数以下函数进行调用:
- void*mmap(void*addr,size_tsize,intprot,intflags,intfd,longoffset)
然后进入系统调用。
2. Kernel mmap实现
1)然后进入系统调用,其系统调用号为:
kernel/arch/arm/include/asm/unistd.h
#define __NR_mmap2(__NR_SYSCALL_BASE+192)
2)触发软中断
其ISR 代码位于kernel/arch/arm/kernel/entry-common.S的ENTRY(vector_swi), __NR_mmap2对应的函数为:sys_mmap2(位于linux/arch/arm/kernel/calls.S)
3)sys_mmap2的实现
位于kernel/arch/arm/kernel/entry-common.S,实现代码如下:
-
-
-
-
- sys_mmap2:
- #ifPAGE_SHIFT>12
- tstr5,#PGOFF_MASK
- moveqr5,r5,lsr#PAGE_SHIFT-12
- streqr5,[sp,#4]
- beqsys_mmap_pgoff
- movr0,#-EINVAL
- movpc,lr
- #else
- strr5,[sp,#4]
- bsys_mmap_pgoff
- #endif
4) 调用sys_mmap_pgoff
在kernel/include/linux/syscalls.h中定义如下:
- asmlinkagelongsys_mmap_pgoff(unsignedlongaddr,unsignedlonglen,
- unsignedlongprot,unsignedlongflags,
- unsignedlongfd,unsignedlongpgoff);
6)sys_mmap_pgoff实现
在kernel/mm/mmap.c中实现如下:
- SYSCALL_DEFINE6(mmap_pgoff,unsignedlong,addr,unsignedlong,len,
- unsignedlong,prot,unsignedlong,flags,
- unsignedlong,fd,unsignedlong,pgoff)
- {
- structfile*file=NULL;
- unsignedlongretval=-EBADF;
-
- if(!(flags&MAP_ANONYMOUS)){
- audit_mmap_fd(fd,flags);
- if(unlikely(flags&MAP_HUGETLB))
- return-EINVAL;
- file=fget(fd);
- if(!file)
- gotoout;
- }elseif(flags&MAP_HUGETLB){
- structuser_struct*user=NULL;
-
-
-
-
-
- len=ALIGN(len,huge_page_size(&default_hstate));
- file=hugetlb_file_setup(HUGETLB_ANON_FILE,len,VM_NORESERVE,
- &user,HUGETLB_ANONHUGE_INODE);
- if(IS_ERR(file))
- returnPTR_ERR(file);
- }
-
- flags&=~(MAP_EXECUTABLE|MAP_DENYWRITE);
-
- down_write(¤t->mm->mmap_sem);
- retval=do_mmap_pgoff(file,addr,len,prot,flags,pgoff);
- up_write(¤t->mm->mmap_sem);
-
- if(file)
- fput(file);
- out:
- returnretval;
- }
其功能为:从当前进程中获取用户态可用的虚拟地址空间(vm_area_struct *vma),在mmap_region中真正获取vma,然后调用file->f_op->mmap(file, vma),调用具体的支持mmap的驱动来处理。
下面以binder驱动为例。
3. binder mmap实现
binder驱动的mmap函数为:binder_mmap,其实现代码如下:
- staticintbinder_mmap(structfile*filp,structvm_area_struct*vma)
- {
- intret;
- structvm_struct*area;
- structbinder_proc*proc=filp->private_data;
- constchar*failure_string;
- structbinder_buffer*buffer;
-
- if((vma->vm_end-vma->vm_start)>SZ_4M)
- vma->vm_end=vma->vm_start+SZ_4M;
-
- binder_debug(BINDER_DEBUG_OPEN_CLOSE,
- "binder_mmap:%d%lx-%lx(%ldK)vma%lxpagep%lx\n",
- proc->pid,vma->vm_start,vma->vm_end,
- (vma->vm_end-vma->vm_start)/SZ_1K,vma->vm_flags,
- (unsignedlong)pgprot_val(vma->vm_page_prot));
-
- if(vma->vm_flags&FORBIDDEN_MMAP_FLAGS){
- ret=-EPERM;
- failure_string="badvm_flags";
- gotoerr_bad_arg;
- }
- vma->vm_flags=(vma->vm_flags|VM_DONTCOPY)&~VM_MAYWRITE;
-
- if(proc->buffer){
- ret=-EBUSY;
- failure_string="alreadymapped";
- gotoerr_already_mapped;
- }
-
- area=get_vm_area(vma->vm_end-vma->vm_start,VM_IOREMAP);
- if(area==NULL){
- ret=-ENOMEM;
- failure_string="get_vm_area";
- gotoerr_get_vm_area_failed;
- }
- proc->buffer=area->addr;
- proc->user_buffer_offset=vma->vm_start-(uintptr_t)proc->buffer;
-
- #ifdefCONFIG_CPU_CACHE_VIPT
- if(cache_is_vipt_aliasing()){
- while(CACHE_COLOUR((vma->vm_start^(uint32_t)proc->buffer))){
- printk(KERN_INFO"binder_mmap:%d%lx-%lxmaps%pbadalignment\n",proc->pid,vma->vm_start,vma->vm_end,proc->buffer);
- vma->vm_start+=PAGE_SIZE;
- }
- }
- #endif
- proc->pages=kzalloc(sizeof(proc->pages[0])*((vma->vm_end-vma->vm_start)/PAGE_SIZE),GFP_KERNEL);
- if(proc->pages==NULL){
- ret=-ENOMEM;
- failure_string="allocpagearray";
- gotoerr_alloc_pages_failed;
- }
- proc->buffer_size=vma->vm_end-vma->vm_start;
-
- vma->vm_ops=&binder_vm_ops;
- vma->vm_private_data=proc;
-
- if(binder_update_page_range(proc,1,proc->buffer,proc->buffer+PAGE_SIZE,vma)){
- ret=-ENOMEM;
- failure_string="allocsmallbuf";
- gotoerr_alloc_small_buf_failed;
- }
- buffer=proc->buffer;
- INIT_LIST_HEAD(&proc->buffers);
- list_add(&buffer->entry,&proc->buffers);
- buffer->free=1;
- binder_insert_free_buffer(proc,buffer);
- proc->free_async_space=proc->buffer_size/2;
- barrier();
- proc->files=get_files_struct(current);
- proc->vma=vma;
-
-
- return0;
-
- err_alloc_small_buf_failed:
- kfree(proc->pages);
- proc->pages=NULL;
- err_alloc_pages_failed:
- vfree(proc->buffer);
- proc->buffer=NULL;
- err_get_vm_area_failed:
- err_already_mapped:
- err_bad_arg:
- printk(KERN_ERR"binder_mmap:%d%lx-%lx%sfailed%d\n",
- proc->pid,vma->vm_start,vma->vm_end,failure_string,ret);
- returnret;
- }
1)获取kernel态虚拟地址空间:
struct vm_struct *area;
area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP);
根据传过来的vma(数据结构为vm_area_struct,属于进程的一段空间,用于与内核空间映射用的),调用get_vm_area在内核的vmalloc区域获得一个相同大小的连续空间,数据结构为vm_struct,同时将该结构加入到vm_list统一管理
2)保存kernel态虚拟地址空间的起始地址,以便后面使用:
proc->buffer = area->addr;
3) 计算并保存进程用户态虚拟地址空间起始地址与kernel态虚拟地址空间的起始地址的差值,以便后面使用。
proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer;
4)分配物理页表项(struct page)
proc->pages = kzalloc(sizeof(proc->pages[0]) * ((vma->vm_end - vma->vm_start) / PAGE_SIZE), GFP_KERNEL);
5)binder_update_page_range
它的工作为:
a)分配物理页
b)分别对vma用户空间建立页表、对vmalloc区域建立页表映射关系。
前面有了用户态和Kernel态的虚拟地址空间,但是还不能访问,因为还没有对应的物理内存。
补充知识:
a)struct page用于跟踪描述一个物理页面是否正在被使用。所有的page结构将都被存入一个叫做mem_map的全局数组中.
b)在每个进程的task_struct中包含一个指向mm_struct结构的指针.进程的mm_struct中则包含了进程可执行影像的页目录指针pgd.还包含了指向vm_area_struct的几个指针,每个vm_area_struct包含一个进程的虚拟地址区域.
binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)
proc->buffer指向内核的vmalloc区域的起始地址,前面已经有了vma(vm_area_struct)和area(vm_struct)。binder_update_page_range实现代码如下:
- staticintbinder_update_page_range(structbinder_proc*proc,intallocate,
- void*start,void*end,
- structvm_area_struct*vma)
- {
- void*page_addr;
- unsignedlonguser_page_addr;
- structvm_structtmp_area;
- structpage**page;
- structmm_struct*mm;
-
- binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- "binder:%d:%spages%p-%p\n",proc->pid,
- allocate?"allocate":"free",start,end);
-
- if(end<=start)
- return0;
-
- if(vma)
- mm=NULL;
- else
- mm=get_task_mm(proc->tsk);
-
- if(mm){
- down_write(&mm->mmap_sem);
- vma=proc->vma;
- }
-
- if(allocate==0)
- gotofree_range;
-
- if(vma==NULL){
- printk(KERN_ERR"binder:%d:binder_alloc_buffailedto"
- "mappagesinuserspace,novma\n",proc->pid);
- gotoerr_no_vma;
- }
-
- for(page_addr=start;page_addr
- intret;
- structpage**page_array_ptr;
- page=&proc->pages[(page_addr-proc->buffer)/PAGE_SIZE];
-
- BUG_ON(*page);
- *page=alloc_page(GFP_KERNEL|__GFP_ZERO);
- if(*page==NULL){
- printk(KERN_ERR"binder:%d:binder_alloc_buffailed"
- "forpageat%p\n",proc->pid,page_addr);
- gotoerr_alloc_page_failed;
- }
- tmp_area.addr=page_addr;
- tmp_area.size=PAGE_SIZE+PAGE_SIZE;
- page_array_ptr=page;
- ret=map_vm_area(&tmp_area,PAGE_KERNEL,&page_array_ptr);
- if(ret){
- printk(KERN_ERR"binder:%d:binder_alloc_buffailed"
- "tomappageat%pinkernel\n",
- proc->pid,page_addr);
- gotoerr_map_kernel_failed;
- }
- user_page_addr=
- (uintptr_t)page_addr+proc->user_buffer_offset;
- ret=vm_insert_page(vma,user_page_addr,page[0]);
- if(ret){
- printk(KERN_ERR"binder:%d:binder_alloc_buffailed"
- "tomappageat%lxinuserspace\n",
- proc->pid,user_page_addr);
- gotoerr_vm_insert_page_failed;
- }
- }
- if(mm){
- up_write(&mm->mmap_sem);
- mmput(mm);
- }
- return0;
-
- free_range:
- for(page_addr=end-PAGE_SIZE;page_addr>=start;
- page_addr-=PAGE_SIZE){
- page=&proc->pages[(page_addr-proc->buffer)/PAGE_SIZE];
- if(vma)
- zap_page_range(vma,(uintptr_t)page_addr+
- proc->user_buffer_offset,PAGE_SIZE,NULL);
- err_vm_insert_page_failed:
- unmap_kernel_range((unsignedlong)page_addr,PAGE_SIZE);
- err_map_kernel_failed:
- __free_page(*page);
- *page=NULL;
- err_alloc_page_failed:
- ;
- }
- err_no_vma:
- if(mm){
- up_write(&mm->mmap_sem);
- mmput(mm);
- }
- return-ENOMEM;
- }
a)map_vm_area:映射Kernel虚拟地址到物理内存,为vmalloc区域的连续地址空间进行页表映射,当然需要vm_struct(提供虚拟地址)参数和page参数(用来makepte的),这就完成了内核区的映射
b) vm_insert_page:更新vma对应的页表,这样就是实现了mmap功能
c)binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)调用的时候只分配了1页,这个是为了节约空间,按需分配。而进程虚拟空间和vmalloc内核空间按需要分配,反正它不占用实际物理内存,所以开始就占用了所需的全部空间,而实际的物理页按需获取;
proc->vma为调用进程的一段用户空间;
proc->files为调用进程的files_struct结构;
proc->buffer_size为需要映射的长度(小于4m)-sizeof(structbinder_buffer);
proc->pages为分配的物理页page的指针数组,开始只有一项,即1页,但是长度还是预留好了;
proc->buffer为内核连续映射区首地址;
proc->user_buffer_offset为用户空间映射区首地址-内核空间连续映射的首地址。
http://www.linuxidc.com/Linux/2012-03/55897p3.htm