在文件读写之前,我们必须先打开文件。从应用程序的角度来看,这是通过标准库的open函数完成的,该函数返回一个文件描述符。内核中是由系统调用sys_open()函数完成。
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) { long ret; if (force_o_largefile()) flags |= O_LARGEFILE; ret = do_sys_open(AT_FDCWD, filename, flags, mode); /* avoid REGPARM breakage on x86: */ asmlinkage_protect(3, ret, filename, flags, mode); return ret; }
# long do_sys_open(int dfd, const char __user *filename, int flags, int mode) # { # /*从进程地址空间读取该文件的路径名*/ # char *tmp = getname(filename); # int fd = PTR_ERR(tmp); # # if (!IS_ERR(tmp)) { # /*在内核中,每个打开的文件由一个文件描述符表示 # 该描述符在特定于进程的数组中充当位置索引(数组是 # task_struct->files->fd_arry),该数组的元素包含了file结构,其中 # 包括每个打开文件的所有必要信息。因此,调用下面 # 函数查找一个未使用的文件描述符,返回的是上面 # 说的数组的下标*/ # fd = get_unused_fd_flags(flags); # if (fd >= 0) { # /*fd获取成功则开始打开文件,此函数是主要完成打开功能的函数*/ # struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); # if (IS_ERR(f)) { # put_unused_fd(fd); # fd = PTR_ERR(f); # } else { # fsnotify_open(f->f_path.dentry); # fd_install(fd, f); # } # } # putname(tmp); # } # return fd; # }
# struct file *do_filp_open(int dfd, const char *pathname, # int open_flag, int mode, int acc_mode) # { # struct file *filp; # struct nameidata nd; # int error; # struct path path; # struct dentry *dir; # int count = 0; # int will_write; # /*改变参数flag的值,具体做法是flag+1*/ # int flag = open_to_namei_flags(open_flag); # /*设置访问权限*/ # if (!acc_mode) # acc_mode = MAY_OPEN | ACC_MODE(flag); # # /* O_TRUNC implies we need access checks for write permissions */ # # /*根据 O_TRUNC标志设置写权限 */ # if (flag & O_TRUNC) # acc_mode |= MAY_WRITE; # # /* Allow the LSM permission hook to distinguish append # access from general write access. */ # /* 设置O_APPEND 标志*/ # if (flag & O_APPEND) # acc_mode |= MAY_APPEND; # # /* # * The simplest case - just a plain lookup. # */ # /*如果不是创建文件*/ # if (!(flag & O_CREAT)) { # /*当内核要访问一个文件的时候,第一步要做的是找到这个文件, # 而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。 # 这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构, # 并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后 # 通过文件描述符,来访问这些数据结构*/ # error = path_lookup_open(dfd, pathname, lookup_flags(flag), # &nd, flag); # if (error) # return ERR_PTR(error); # goto ok;/*跳过下面的创建部分*/ # } # # /* # * Create - we need to know the parent. # */ # /*到此则是要创建文件*/ # /* path-init为查找作准备工作,path_walk真正上路查找, # 这两个函数联合起来根据一段路径名找到对应的dentry */ # error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); # if (error) # return ERR_PTR(error); # error = path_walk(pathname, &nd); # if (error) { # if (nd.root.mnt) # path_put(&nd.root); # return ERR_PTR(error); # } # if (unlikely(!audit_dummy_context())) # /*保存inode节点信息*/ # audit_inode(pathname, nd.path.dentry); # # /* # * We have the parent and last component. First of all, check # * that we are not asked to creat(2) an obvious directory - that # * will not do. # */ # error = -EISDIR; # /*父节点信息*/ # if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) # goto exit_parent; # # error = -ENFILE; # /*获取文件指针*/ # filp = get_empty_filp(); # if (filp == NULL) # goto exit_parent; # /*填充nameidata 结构*/ # nd.intent.open.file = filp; # nd.intent.open.flags = flag; # nd.intent.open.create_mode = mode; # dir = nd.path.dentry; # nd.flags &= ~LOOKUP_PARENT; # nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; # if (flag & O_EXCL) # nd.flags |= LOOKUP_EXCL; # mutex_lock(&dir->d_inode->i_mutex); # /*从哈希表中查找目的文件对应的dentry,上面路径搜索的是父节点 # 也就是目的文件的上一层目录,为了得到目的文件的 # path结构,我们用nd中的last结构和上一层目录的dentry结构 # 可以找到*/ # path.dentry = lookup_hash(&nd); # path.mnt = nd.path.mnt; # /*到此目标节点的path结构已经找到*/ # do_last: # error = PTR_ERR(path.dentry); # if (IS_ERR(path.dentry)) { # mutex_unlock(&dir->d_inode->i_mutex); # goto exit; # } # # if (IS_ERR(nd.intent.open.file)) { # error = PTR_ERR(nd.intent.open.file); # goto exit_mutex_unlock; # } # # /* Negative dentry, just create the file */ # /*如果此dentry结构没有对应的inode节点,说明是无效的,应该创建文件节点 */ # if (!path.dentry->d_inode) { # /* # * This write is needed to ensure that a # * ro->rw transition does not occur between # * the time when the file is created and when # * a permanent write count is taken through # * the 'struct file' in nameidata_to_filp(). # */ # /*write权限是必需的*/ # error = mnt_want_write(nd.path.mnt); # if (error) # goto exit_mutex_unlock; # /*按照namei格式的flag open*,主要是创建inode*/ # error = __open_namei_create(&nd, &path, flag, mode); # if (error) { # mnt_drop_write(nd.path.mnt); # goto exit; # } # /*根据nameidata 得到相应的file结构*/ # filp = nameidata_to_filp(&nd, open_flag); # if (IS_ERR(filp)) # ima_counts_put(&nd.path, # acc_mode & (MAY_READ | MAY_WRITE | # MAY_EXEC)); # /*放弃写权限*/ # mnt_drop_write(nd.path.mnt); # if (nd.root.mnt) # path_put(&nd.root); # return filp; # } # # /* # * It already exists. # */ # /*要打开的文件已经存在*/ # mutex_unlock(&dir->d_inode->i_mutex); # /*保存inode节点*/ # audit_inode(pathname, path.dentry); # # error = -EEXIST; # if (flag & O_EXCL) # goto exit_dput; # /*如果path上安装了文件系统,则依次往下找,直到找到 # 的文件系统没有安装别的文件系统,更新path结构为 # 此文件系统的根目录信息*/ # if (__follow_mount(&path)) { # error = -ELOOP; # if (flag & O_NOFOLLOW) # goto exit_dput; # } # # error = -ENOENT; # if (!path.dentry->d_inode) # goto exit_dput; # if (path.dentry->d_inode->i_op->follow_link) # goto do_link;/*顺次遍历符号链接*/ # /*路径转化为相应的nameidata 结构*/ # path_to_nameidata(&path, &nd); # error = -EISDIR; # /*如果是文件夹*/ # if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) # goto exit; # /*到这里,nd结构中存放的信息已经是最后的目的文件信息*/ # ok: # /* # * Consider: # * 1. may_open() truncates a file # * 2. a rw->ro mount transition occurs # * 3. nameidata_to_filp() fails due to # * the ro mount. # * That would be inconsistent, and should # * be avoided. Taking this mnt write here # * ensures that (2) can not occur. # */ # will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); # if (will_write) { # error = mnt_want_write(nd.path.mnt); # if (error) # goto exit; # } # /*may_open执行权限检测、文件打开和truncate的操作*/ # error = may_open(&nd.path, acc_mode, flag); # if (error) { # if (will_write) # mnt_drop_write(nd.path.mnt); # goto exit; # } # /*将nameidata转化为file*/ # filp = nameidata_to_filp(&nd, open_flag); # if (IS_ERR(filp)) # ima_counts_put(&nd.path, # acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); # /* # * It is now safe to drop the mnt write # * because the filp has had a write taken # * on its behalf. # */ # if (will_write) # /*释放写权限*/ # mnt_drop_write(nd.path.mnt); # if (nd.root.mnt) # /*释放引用计数*/ # path_put(&nd.root); # return filp; # # exit_mutex_unlock: # mutex_unlock(&dir->d_inode->i_mutex); # exit_dput: # path_put_conditional(&path, &nd); # exit: # if (!IS_ERR(nd.intent.open.file)) # release_open_intent(&nd); # exit_parent: # if (nd.root.mnt) # path_put(&nd.root); # path_put(&nd.path); # return ERR_PTR(error); # /*允许遍历连接文件,则手工找到连接文件对应的文件*/ # do_link: # error = -ELOOP; # if (flag & O_NOFOLLOW) # goto exit_dput;/*不允许遍历连接文件,返回错误*/ # /* # * This is subtle. Instead of calling do_follow_link() we do the # * thing by hands. The reason is that this way we have zero link_count # * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. # * After that we have the parent and last component, i.e. # * we are in the same situation as after the first path_walk(). # * Well, almost - if the last component is normal we get its copy # * stored in nd->last.name and we will have to putname() it when we # * are done. Procfs-like symlinks just set LAST_BIND. # */ # /*以下是手工找到链接文件对应的文件dentry结构代码 # */ # /*设置查找LOOKUP_PARENT标志*/ # nd.flags |= LOOKUP_PARENT; # /*判断操作是否安全*/ # error = security_inode_follow_link(path.dentry, &nd); # if (error) # goto exit_dput; # /*处理符号链接,即路径搜索,结果放入nd中*/ # error = __do_follow_link(&path, &nd); # if (error) { # /* Does someone understand code flow here? Or it is only # * me so stupid? Anathema to whoever designed this non-sense # * with "intent.open". # */ # release_open_intent(&nd); # if (nd.root.mnt) # path_put(&nd.root); # return ERR_PTR(error); # } # nd.flags &= ~LOOKUP_PARENT; # /*检查最后一段文件或目录名的属性情况*/ # if (nd.last_type == LAST_BIND) # goto ok; # error = -EISDIR; # if (nd.last_type != LAST_NORM) # goto exit; # if (nd.last.name[nd.last.len]) { # __putname(nd.last.name); # goto exit; # } # error = -ELOOP; # /*出现回环标志: 循环超过32次*/ # if (count++==32) { # __putname(nd.last.name); # goto exit; # } # dir = nd.path.dentry; # mutex_lock(&dir->d_inode->i_mutex); # /*更新路径的挂接点和dentry*/ # path.dentry = lookup_hash(&nd); # path.mnt = nd.path.mnt; # __putname(nd.last.name); # goto do_last; # }在内核中要打开一个文件,首先应该找到这个文件,而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的,关于文件路径查找在前面已经分析过相关的代码了。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构,并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后通过文件描述符,来访问这些数据结构。