linux1.0版本mmap研究

今天看go的runtime的时候,内存堆初始化的时候是调用mmap申请的虚拟内存,于是搜了下0.11的linux源码,发现还没有实现,就又找了1.0版本的(毕竟越早代码越少都是核心实现- -)

先看代码

asmlinkage int sys_mmap(unsigned long *buffer)
{
    int error;
    unsigned long flags;
    struct file * file = NULL;

    error = verify_area(VERIFY_READ, buffer, 6*4);
    if (error)
        return error;
    //flas在第四个参数,get_fs_long是从用户空间读到内核空间来    
    flags = get_fs_long(buffer+3);
    if (!(flags & MAP_ANONYMOUS)) {
        unsigned long fd = get_fs_long(buffer+4);
        // 可以看到该版本只支持已打开的fd,还不支持匿名文件
        if (fd >= NR_OPEN || !(file = current->filp[fd]))
            return -EBADF;
    }
    // 最终调用do_mmap来实现
    return do_mmap(file, get_fs_long(buffer), get_fs_long(buffer+1),
        get_fs_long(buffer+2), flags, get_fs_long(buffer+5));
}

do_mmap

int do_mmap(struct file * file, unsigned long addr, unsigned long len,
    unsigned long prot, unsigned long flags, unsigned long off)
{
    int mask, error;

    if ((len = PAGE_ALIGN(len)) == 0)
        return addr;

    
    if (addr > TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE-len)
        return -EINVAL;

    if (file != NULL)
        switch (flags & MAP_TYPE) {
        case MAP_SHARED:
            if ((prot & PROT_WRITE) && !(file->f_mode & 2))
                return -EACCES;
            /* fall through */
        case MAP_PRIVATE:
            if (!(file->f_mode & 1))
                return -EACCES;
            break;

        default:
            return -EINVAL;
        }
    if (flags & MAP_FIXED) {
        if (addr & ~PAGE_MASK)
            return -EINVAL;
        if (len > TASK_SIZE || addr > TASK_SIZE - len)
            return -EINVAL;
    } else {
        struct vm_area_struct * vmm;

        addr = SHM_RANGE_START;
        while (addr+len < SHM_RANGE_END) {
            for (vmm = current->mmap ; vmm ; vmm = vmm->vm_next) {
                if (addr >= vmm->vm_end)
                    continue;
                if (addr + len <= vmm->vm_start)
                    continue;
                /*如果有交叉则停止,也有可能addr,len和多个地址空间交叉,
                 * 所以在找到第一个之后就将addr设置为vmm指向的地址空间末端,
                 * 然后继续查找从前一个虚拟地址空间的结尾处
                 */
                addr = PAGE_ALIGN(vmm->vm_end);
                break;
            }
            if (!vmm)
                break;
        }
        if (addr+len >= SHM_RANGE_END)
            return -ENOMEM;
    }
    if (file && (!file->f_op || !file->f_op->mmap))
        return -ENODEV;
    mask = 0;
    if (prot & (PROT_READ | PROT_EXEC))
        mask |= PAGE_READONLY;
    if (prot & PROT_WRITE)
        if ((flags & MAP_TYPE) == MAP_PRIVATE)
            mask |= PAGE_COPY;
        else
            mask |= PAGE_SHARED;
    if (!mask)
        return -EINVAL;

    do_munmap(addr, len);   /* Clear old maps */

    if (file)
        /* 调用本文件中的generic_mmap函数
          */
        error = file->f_op->mmap(file->f_inode, file, addr, len, mask, off);
    else
        error = anon_map(NULL, NULL, addr, len, mask, off);
    
    if (!error)
        return addr;

    if (!current->errno)
        current->errno = -error;
    return -1;
}

generic_mmap:

int generic_mmap(struct inode * inode, struct file * file,
    unsigned long addr, size_t len, int prot, unsigned long off)
{
    struct vm_area_struct * mpnt;
    extern struct vm_operations_struct file_mmap;
    struct buffer_head * bh;

    if (prot & PAGE_RW) /* only PAGE_COW or read-only supported right now */
        return -EINVAL;
    if (off & (inode->i_sb->s_blocksize - 1))
        return -EINVAL;
    if (!inode->i_sb || !S_ISREG(inode->i_mode))
        return -EACCES;
    if (!inode->i_op || !inode->i_op->bmap)
        return -ENOEXEC;
    if (!(bh = bread(inode->i_dev,bmap(inode,0),inode->i_sb->s_blocksize)))
        return -EACCES;
    if (!IS_RDONLY(inode)) {
        inode->i_atime = CURRENT_TIME;
        inode->i_dirt = 1;
    }
    brelse(bh);

    /* 分配一个虚拟地址空间,运行到这个位置就代表
     * 起始地址addr长度为len的虚拟地址空间只可以
     * 映射到进程的地址空间当中的。
     */
    mpnt = (struct vm_area_struct * ) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
    if (!mpnt)
        return -ENOMEM;

    unmap_page_range(addr, len);    
    mpnt->vm_task = current;
    mpnt->vm_start = addr;
    mpnt->vm_end = addr + len;
    mpnt->vm_page_prot = prot;
    mpnt->vm_share = NULL;
    /*如果文件被映射到虚拟地址空间,
     *则inode的引用计数会增加1,其中off代表从文件中的第几个字节处
     *开始映射
     */
    mpnt->vm_inode = inode;
    inode->i_count++;
    mpnt->vm_offset = off;
    mpnt->vm_ops = &file_mmap;
    insert_vm_struct(current, mpnt);
    merge_segments(current->mmap, NULL, NULL);
    
    return 0;
}

void insert_vm_struct(struct task_struct *t, struct vm_area_struct *vmp)
{
    struct vm_area_struct **nxtpp, *mpnt;

    nxtpp = &t->mmap;
    
    for(mpnt = t->mmap; mpnt != NULL; mpnt = mpnt->vm_next)
    {
        if (mpnt->vm_start > vmp->vm_start)
            break;
        nxtpp = &mpnt->vm_next;

        if ((vmp->vm_start >= mpnt->vm_start &&
             vmp->vm_start < mpnt->vm_end) ||
            (vmp->vm_end >= mpnt->vm_start &&
             vmp->vm_end < mpnt->vm_end))
            printk("insert_vm_struct: ins area %lx-%lx in area %lx-%lx\n",
                   vmp->vm_start, vmp->vm_end,
                   mpnt->vm_start, vmp->vm_end);
    }
    
    vmp->vm_next = mpnt;

    *nxtpp = vmp;
}

额,其实我也不知道分析啥,代码都很清晰了,如果熟悉C语言的malloc的话,对这儿的进程的mmap链表就一眼就懂了,就是在mmap中分配的对应大小的内存,而且是与文件的inode绑定的。这样就可以实现用户进程对该内存区域的读写直接映射到文件,相比write,read,可以省一部分从用户空间和内核空间数据搬运的工作

你可能感兴趣的:(linux1.0版本mmap研究)