今天看go的runtime的时候,内存堆初始化的时候是调用mmap申请的虚拟内存,于是搜了下0.11的linux源码,发现还没有实现,就又找了1.0版本的(毕竟越早代码越少都是核心实现- -)
先看代码
asmlinkage int sys_mmap(unsigned long *buffer)
{
int error;
unsigned long flags;
struct file * file = NULL;
error = verify_area(VERIFY_READ, buffer, 6*4);
if (error)
return error;
//flas在第四个参数,get_fs_long是从用户空间读到内核空间来
flags = get_fs_long(buffer+3);
if (!(flags & MAP_ANONYMOUS)) {
unsigned long fd = get_fs_long(buffer+4);
// 可以看到该版本只支持已打开的fd,还不支持匿名文件
if (fd >= NR_OPEN || !(file = current->filp[fd]))
return -EBADF;
}
// 最终调用do_mmap来实现
return do_mmap(file, get_fs_long(buffer), get_fs_long(buffer+1),
get_fs_long(buffer+2), flags, get_fs_long(buffer+5));
}
do_mmap
int do_mmap(struct file * file, unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags, unsigned long off)
{
int mask, error;
if ((len = PAGE_ALIGN(len)) == 0)
return addr;
if (addr > TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE-len)
return -EINVAL;
if (file != NULL)
switch (flags & MAP_TYPE) {
case MAP_SHARED:
if ((prot & PROT_WRITE) && !(file->f_mode & 2))
return -EACCES;
/* fall through */
case MAP_PRIVATE:
if (!(file->f_mode & 1))
return -EACCES;
break;
default:
return -EINVAL;
}
if (flags & MAP_FIXED) {
if (addr & ~PAGE_MASK)
return -EINVAL;
if (len > TASK_SIZE || addr > TASK_SIZE - len)
return -EINVAL;
} else {
struct vm_area_struct * vmm;
addr = SHM_RANGE_START;
while (addr+len < SHM_RANGE_END) {
for (vmm = current->mmap ; vmm ; vmm = vmm->vm_next) {
if (addr >= vmm->vm_end)
continue;
if (addr + len <= vmm->vm_start)
continue;
/*如果有交叉则停止,也有可能addr,len和多个地址空间交叉,
* 所以在找到第一个之后就将addr设置为vmm指向的地址空间末端,
* 然后继续查找从前一个虚拟地址空间的结尾处
*/
addr = PAGE_ALIGN(vmm->vm_end);
break;
}
if (!vmm)
break;
}
if (addr+len >= SHM_RANGE_END)
return -ENOMEM;
}
if (file && (!file->f_op || !file->f_op->mmap))
return -ENODEV;
mask = 0;
if (prot & (PROT_READ | PROT_EXEC))
mask |= PAGE_READONLY;
if (prot & PROT_WRITE)
if ((flags & MAP_TYPE) == MAP_PRIVATE)
mask |= PAGE_COPY;
else
mask |= PAGE_SHARED;
if (!mask)
return -EINVAL;
do_munmap(addr, len); /* Clear old maps */
if (file)
/* 调用本文件中的generic_mmap函数
*/
error = file->f_op->mmap(file->f_inode, file, addr, len, mask, off);
else
error = anon_map(NULL, NULL, addr, len, mask, off);
if (!error)
return addr;
if (!current->errno)
current->errno = -error;
return -1;
}
generic_mmap:
int generic_mmap(struct inode * inode, struct file * file,
unsigned long addr, size_t len, int prot, unsigned long off)
{
struct vm_area_struct * mpnt;
extern struct vm_operations_struct file_mmap;
struct buffer_head * bh;
if (prot & PAGE_RW) /* only PAGE_COW or read-only supported right now */
return -EINVAL;
if (off & (inode->i_sb->s_blocksize - 1))
return -EINVAL;
if (!inode->i_sb || !S_ISREG(inode->i_mode))
return -EACCES;
if (!inode->i_op || !inode->i_op->bmap)
return -ENOEXEC;
if (!(bh = bread(inode->i_dev,bmap(inode,0),inode->i_sb->s_blocksize)))
return -EACCES;
if (!IS_RDONLY(inode)) {
inode->i_atime = CURRENT_TIME;
inode->i_dirt = 1;
}
brelse(bh);
/* 分配一个虚拟地址空间,运行到这个位置就代表
* 起始地址addr长度为len的虚拟地址空间只可以
* 映射到进程的地址空间当中的。
*/
mpnt = (struct vm_area_struct * ) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!mpnt)
return -ENOMEM;
unmap_page_range(addr, len);
mpnt->vm_task = current;
mpnt->vm_start = addr;
mpnt->vm_end = addr + len;
mpnt->vm_page_prot = prot;
mpnt->vm_share = NULL;
/*如果文件被映射到虚拟地址空间,
*则inode的引用计数会增加1,其中off代表从文件中的第几个字节处
*开始映射
*/
mpnt->vm_inode = inode;
inode->i_count++;
mpnt->vm_offset = off;
mpnt->vm_ops = &file_mmap;
insert_vm_struct(current, mpnt);
merge_segments(current->mmap, NULL, NULL);
return 0;
}
void insert_vm_struct(struct task_struct *t, struct vm_area_struct *vmp)
{
struct vm_area_struct **nxtpp, *mpnt;
nxtpp = &t->mmap;
for(mpnt = t->mmap; mpnt != NULL; mpnt = mpnt->vm_next)
{
if (mpnt->vm_start > vmp->vm_start)
break;
nxtpp = &mpnt->vm_next;
if ((vmp->vm_start >= mpnt->vm_start &&
vmp->vm_start < mpnt->vm_end) ||
(vmp->vm_end >= mpnt->vm_start &&
vmp->vm_end < mpnt->vm_end))
printk("insert_vm_struct: ins area %lx-%lx in area %lx-%lx\n",
vmp->vm_start, vmp->vm_end,
mpnt->vm_start, vmp->vm_end);
}
vmp->vm_next = mpnt;
*nxtpp = vmp;
}
额,其实我也不知道分析啥,代码都很清晰了,如果熟悉C语言的malloc的话,对这儿的进程的mmap链表就一眼就懂了,就是在mmap中分配的对应大小的内存,而且是与文件的inode绑定的。这样就可以实现用户进程对该内存区域的读写直接映射到文件,相比write,read,可以省一部分从用户空间和内核空间数据搬运的工作