文件系统--open系统调用详解

arm64平台关于32位系统调用的定义:

#define __NR_open 5
__SYSCALL(__NR_open, compat_sys_open)


#undef __SYSCALL
#define __SYSCALL(nr, sym)  [nr] = sym,

/*
 * The sys_call_table array must be 4K aligned to be accessible from
 * kernel/entry.S.
 */
void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = {
    [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall,
#include 
};

arm64平台上64位系统调用的定义:

#define __NR_open                 8
__SYSCALL(  8, sys_open, 3)



#undef __SYSCALL
#define __SYSCALL(nr, sym)  [nr] = sym,

/*
 * The sys_call_table array must be 4K aligned to be accessible from
 * kernel/entry.S.
 */
void * const sys_call_table[__NR_syscalls] __aligned(4096) = {
    [0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include 
};

下面看一下sys_open的实现:

fs/open.c:

SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
    if (force_o_largefile())
        flags |= O_LARGEFILE;

    return do_sys_open(AT_FDCWD, filename, flags, mode);
}

通过宏定义展开,实际上上述定义实现了一个sys_open函数,可以看到核心是调用了do_sys_open函数去处理系统调用。下面来看一下它的实现:

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
    struct open_flags op;
    int fd = build_open_flags(flags, mode, &op); //创建和生成文件打开flags
    struct filename *tmp;

    if (fd)
        return fd;

    tmp = getname(filename);  //获取一个filename
    if (IS_ERR(tmp))
        return PTR_ERR(tmp);

    fd = get_unused_fd_flags(flags); //获取一个未用的fd
    if (fd >= 0) {
        struct file *f = do_filp_open(dfd, tmp, &op); //打开操作,创建并填充对应的struct file结构体,前面已经获取了filename,这里是创建一个file结构体
						      //每次打开操作都会创建一个file,即使是同一个文件
        if (IS_ERR(f)) {
            put_unused_fd(fd);
            fd = PTR_ERR(f);
        } else {
            fsnotify_open(f);
            fd_install(fd, f);  //绑定对应的fd和struct file结构体,其实就是在对应的fd array上存上struct file
        }
    }
    putname(tmp);
    return fd;
}
  • getname

下面分步骤来看,第一步就是getname,它最近调用的是getname_flags,这个函数的目的就是获取并初始化一个filename结构体:

struct filename {
    const char      *name;  /* pointer to actual string */
    const __user char   *uptr;  /* original userland pointer */
    struct audit_names  *aname;
    int         refcnt;
    bool            separate; /* should "name" be freed? */
};

filename的结构体如上所示,它的size大小是很小的,但是一个slab申请的内存大小为1024 bytes,所以除了这个结构体占用的长度之外,后面的长度可以用于存储实际的路径字符串。

struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
	struct filename *result, *err;
	int len;
	long max;
	char *kname;

	result = audit_reusename(filename);  //从audit_names链表中查看是否存在相同的filename
	if (result)
		return result;

	result = __getname();  //从slab中申请一块filename内存空间,注意这个大小是PATH_MAX=1024,而不是struct filename的size
	if (unlikely(!result))
		return ERR_PTR(-ENOMEM);
	result->refcnt = 1;

	/*
	 * First, try to embed the struct filename inside the names_cache
	 * allocation
	 */
	/*先尝试在filename的结构体后面位置存储对应的路径字符串
	* 由于是跟结构体连续在一起存储的,所以separate设置为false
	*/
	kname = (char *)result + sizeof(*result);
	result->name = kname;
	result->separate = false;
	max = EMBEDDED_NAME_MAX;

recopy:
	len = strncpy_from_user(kname, filename, max); //拷贝路径字符串到指定位置
	if (unlikely(len < 0)) {
		err = ERR_PTR(len);
		goto error;
	}

	/*
	 * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
	 * separate struct filename so we can dedicate the entire
	 * names_cache allocation for the pathname, and re-do the copy from
	 * userland.
	 */
	//运行到此处,发现要拷贝的路径字符串实际是大于申请的slab空间的
	//因此需要重新分配内存进行拷贝操作,seperate设置为true
	if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
		kname = (char *)result;

		result = kzalloc(sizeof(*result), GFP_KERNEL);
		if (!result) {
			err = ERR_PTR(-ENOMEM);
			result = (struct filename *)kname;
			goto error;
		}
		result->name = kname;
		result->separate = true;
		result->refcnt = 1;
		max = PATH_MAX;
		goto recopy; //申请到内存后跳转到recopy,重新执行拷贝操作
	}

	/* The empty path is special. */
	if (unlikely(!len)) {
		if (empty)
			*empty = 1;
		err = ERR_PTR(-ENOENT);
		if (!(flags & LOOKUP_EMPTY))
			goto error;
	}

	err = ERR_PTR(-ENAMETOOLONG);
	if (unlikely(len >= PATH_MAX))
		goto error;

	result->uptr = filename;
	result->aname = NULL;
	audit_getname(result);  //把对应新创建的filename加入audit审计管理链表中
	return result;

error:
	putname(result);
	return err;
}

这里需要注意一点就是audit_names,每个进程都有对应的审计上下文,其中会保存一个audit_names链表,每个打开的filename都会对应一个audit_names结构,它的作用是用于审计,比如selinux权限检查。

  • get_unused_fd_flags

下面是第二步,根据传入的flags获取未用的fd结构:

int get_unused_fd_flags(unsigned flags)
{
    return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}
EXPORT_SYMBOL(get_unused_fd_flags);

/*
 * allocate a file descriptor, mark it busy.
 */
int __alloc_fd(struct files_struct *files,
	       unsigned start, unsigned end, unsigned flags)
{
	unsigned int fd;
	int error;
	struct fdtable *fdt;

	spin_lock(&files->file_lock);
repeat:
	fdt = files_fdtable(files); //获取files_struct中的fdtable成员,fdtable会存在一个file结构体数组
	fd = start;
	if (fd < files->next_fd) //检查files_struct中记录到下一个fd号
		fd = files->next_fd;

	if (fd < fdt->max_fds) //从下一个fd号开始寻找到下一个未用的fd号
		//这是一个位图操作,max_fds有多大,就对应多少个bit的位图,初始化为一个long类型,对于arm32就是32个bits
		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 

	/*
	 * N.B. For clone tasks sharing a files structure, this test
	 * will limit the total number of files that can be opened.
	 */
	error = -EMFILE;
	if (fd >= end)
		goto out;
	/*如果发现fd是大于max_fds时会执行到这里,那么会进行expand操作
	* 实际上跟进去会发现它会新创建更大的fdtable并更新到files_struct中。
	*/
	error = expand_files(files, fd);
	if (error < 0)
		goto out;

	/*
	 * If we needed to expand the fs array we
	 * might have blocked - try again.
	 */
	if (error)
		goto repeat;

	if (start <= files->next_fd)
		files->next_fd = fd + 1;

	__set_open_fd(fd, fdt);
	if (flags & O_CLOEXEC)
		__set_close_on_exec(fd, fdt); //和open_fds一样是位图操作,初始化为一个long type,对于arm32就是32个bits
	else
		__clear_close_on_exec(fd, fdt);
	error = fd;
#if 1
	/* Sanity check */
	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
		rcu_assign_pointer(fdt->fd[fd], NULL);
	}
#endif

out:
	spin_unlock(&files->file_lock);
	return error;
}
  • do_filp_open

看下他的代码实现:

struct file *do_filp_open(int dfd, struct filename *pathname,
        const struct open_flags *op)
{   
    struct nameidata nd;
    int flags = op->lookup_flags;
    struct file *filp;

    filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); //默认第一次打开是通过rcu walk形式进行打开操作,效率最高
    if (unlikely(filp == ERR_PTR(-ECHILD)))
        filp = path_openat(dfd, pathname, &nd, op, flags); //RCU打开失败后,ref-walk形式打开,可能会睡眠
    if (unlikely(filp == ERR_PTR(-ESTALE)))
        filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);//这种是在前面打开都已经失败的情况下才执行的,比如文件过期(STALE)
    return filp;
}

这个函数最终都会调用path_openat进行下一步操作:

static struct file *path_openat(int dfd, struct filename *pathname,
		struct nameidata *nd, const struct open_flags *op, int flags)
{
	struct file *file;
	struct path path;
	int opened = 0;
	int error;

	file = get_empty_filp(); //1.申请slab,获取一个空的struct file结构体
	if (IS_ERR(file))
		return file;

	file->f_flags = op->open_flag;

	if (unlikely(file->f_flags & __O_TMPFILE)) {
		error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
		goto out;
	}

	error = path_init(dfd, pathname->name, flags, nd); //2.检索对应目标文件所属的父目录项,初始化nameidata结构体,主要包含filename(dentry)和inode等信息
	if (unlikely(error))
		goto out;

	error = do_last(nd, &path, file, op, &opened, pathname); //3.最后处理最后一级子目录项,如果发现最后一级是一个链接,那么会报错返回
	while (unlikely(error > 0)) { /* trailing symlink */  //4.如果发现最后一级子目录项是一个链接,那么要继续跟踪到实际目录
		struct path link = path;
		void *cookie;
		if (!(nd->flags & LOOKUP_FOLLOW)) {
			path_put_conditional(&path, nd);
			path_put(&nd->path);
			error = -ELOOP;
			break;
		}
		error = may_follow_link(&link, nd);
		if (unlikely(error))
			break;
		nd->flags |= LOOKUP_PARENT;
		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
		error = follow_link(&link, nd, &cookie);
		if (unlikely(error))
			break;
		error = do_last(nd, &path, file, op, &opened, pathname);//5.跟踪到实际目录后,重新调用do_last进行处理最后一级
		put_link(nd, &link, cookie);
	}
out:
	path_cleanup(nd);
	if (!(opened & FILE_OPENED)) {
		BUG_ON(!error);
		put_filp(file);
	}
	if (unlikely(error)) {
		if (error == -EOPENSTALE) {
			if (flags & LOOKUP_RCU)
				error = -ECHILD;
			else
				error = -ESTALE;
		}
		file = ERR_PTR(error);
	}
	return file;
}

开始介绍path_init之前,先要介绍一下struct nameidata结构体,这个结构体就是用于目录检索时保存信息的,每次检索一级目录都会更新该结构体中的内容。

struct nameidata {
    struct path path;
    struct qstr last;
    struct path root;
    struct inode    *inode; /* path.dentry.d_inode */
    unsigned int    flags;
    unsigned    seq, m_seq;
    int     last_type;
    unsigned    depth;
    struct file *base;
    char *saved_names[MAX_NESTED_LINKS + 1];
};

其中path表示当前查找的这一级目录路径,last表示当前自路径及其散列值,root表示当前目录对应的根目录,seq是目录项顺序锁,m_seq是文件系统mount顺序锁,last_type表示当前目录的类型:

/*
 * Type of the last component on LOOKUP_PARENT
 */
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};

其中LAST_NORM表示普通文件类型,LAST_ROOT表示root类型,LAST_DOT表示“.”,LAST_DOTDOT表示“…”,LAST_BIND表示文件连接类型。

static int path_init(int dfd, const char *name, unsigned int flags,
		     struct nameidata *nd)
{
	int retval = 0;

	nd->last_type = LAST_ROOT; /* if there are only slashes... */  //默认类型为ROOT类型
	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
	nd->depth = 0;
	nd->base = NULL;
	if (flags & LOOKUP_ROOT) { //如果打开标志包含LOOKUP_ROOT
		struct dentry *root = nd->root.dentry;
		struct inode *inode = root->d_inode;
		if (*name) {
			if (!d_can_lookup(root))
				return -ENOTDIR;
			retval = inode_permission(inode, MAY_EXEC);
			if (retval)
				return retval;
		}
		nd->path = nd->root;
		nd->inode = inode;
		if (flags & LOOKUP_RCU) {
			rcu_read_lock();
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			nd->m_seq = read_seqbegin(&mount_lock);
		} else {
			path_get(&nd->path);
		}
		goto done;
	}

	nd->root.mnt = NULL;

	nd->m_seq = read_seqbegin(&mount_lock);
	if (*name=='/') {   //1.第一种情况如果路径名第一个为/,说明是绝对路径
		if (flags & LOOKUP_RCU) {
			rcu_read_lock();
			nd->seq = set_root_rcu(nd);
		} else {
			set_root(nd);
			path_get(&nd->root);
		}
		nd->path = nd->root; //设置绝对路径的nd->path为nd->root
	} else if (dfd == AT_FDCWD) {//2.第二种情况相对路径是当前进程的工作路径
		if (flags & LOOKUP_RCU) {
			struct fs_struct *fs = current->fs;
			unsigned seq;

			rcu_read_lock();

			do {
				seq = read_seqcount_begin(&fs->seq);
				nd->path = fs->pwd; //设置nd->path
				nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			} while (read_seqcount_retry(&fs->seq, seq));
		} else {
			get_fs_pwd(current->fs, &nd->path); //设置nd->path
		}
	} else { //3.第三种情况相对路径是一个用户指定的路径名
		/* Caller must check execute permissions on the starting path component */
		struct fd f = fdget_raw(dfd);
		struct dentry *dentry;

		if (!f.file)
			return -EBADF;

		dentry = f.file->f_path.dentry;

		if (*name) {
			if (!d_can_lookup(dentry)) {
				fdput(f);
				return -ENOTDIR;
			}
		}

		nd->path = f.file->f_path; //设置nd->path
		if (flags & LOOKUP_RCU) {
			if (f.flags & FDPUT_FPUT)
				nd->base = f.file;
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			rcu_read_lock();
		} else {
			path_get(&nd->path);
			fdput(f);
		}
	}

	nd->inode = nd->path.dentry->d_inode; //设置nd->inode为对应nd->path.dentry->d_inode
	if (!(flags & LOOKUP_RCU))
		goto done;
	if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
		goto done;
	if (!(nd->flags & LOOKUP_ROOT))
		nd->root.mnt = NULL;
	rcu_read_unlock();
	return -ECHILD;
done:
	current->total_link_count = 0;
	return link_path_walk(name, nd); //设置完了nd->path和nd->inode之后就开始最终的link_path_walk了
}

path_init的目标是搜索目标路径的父目录,并把父目录的信息更新到nameidata结构体中,通过上面的注释可以看到,他会先设置nd->path为起始路径,从起始路径开始进行检索。比如,对于AT_FDCWD类型的系统调用来说,会以进程当前路径作为起始检索路径开始搜索。设置nd->path为起始路径后开始执行link_path_walk开始进行循环检索,每次检索一级目录,并且更新nameidata结构体,直到最终的父目录。

static int link_path_walk(const char *name, struct nameidata *nd)
{
	struct path next;
	int err;
	
	while (*name=='/')
		name++; //过滤掉起始位置多余的/字符
	if (!*name)
		return 0;//如果发现/后面已经没有字符,说明是根目录直接返回

	/* At this point we know we have a real path component. */
	for(;;) {
		u64 hash_len;
		int type;

		err = may_lookup(nd);
 		if (err)
			break;

		hash_len = hash_name(name);  //开始对第一级路径做hash处理

		type = LAST_NORM;
		if (name[0] == '.') switch (hashlen_len(hash_len)) { //处理当前目录名为.和..的情况
			case 2:
				if (name[1] == '.') {
					type = LAST_DOTDOT;
					nd->flags |= LOOKUP_JUMPED;
				}
				break;
			case 1:
				type = LAST_DOT;
		}
		if (likely(type == LAST_NORM)) { //处理类型为LAST_NORM的情况,也就是普通文件
			struct dentry *parent = nd->path.dentry;
			nd->flags &= ~LOOKUP_JUMPED;
			if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {  //判断是否要重新进行一次HASH操作
				struct qstr this = { { .hash_len = hash_len }, .name = name };
				err = parent->d_op->d_hash(parent, &this); 
				if (err < 0)
					break;
				hash_len = this.hash_len;
				name = this.name;
			}
		}

		nd->last.hash_len = hash_len;
		nd->last.name = name;
		nd->last_type = type;

		name += hashlen_len(hash_len);  //name指向当前目录的下一级目录
		if (!*name)
			return 0;
		/*
		 * If it wasn't NUL, we know it was '/'. Skip that
		 * slash, and continue until no more slashes.
		 */
		do {
			name++;
		} while (unlikely(*name == '/'));  //这里需要特别注意,为什么说本函数只处理到最终路径的父目录
		if (!*name)                        //当发现当前目录的下一级目录为空,说明当前为最后一级目录时,直接返回,不做walk_component核心处理
			return 0;                  //这样就跳过了最后一级的处理,留给后面的do_last进行处理

		err = walk_component(nd, &next, LOOKUP_FOLLOW); //该步骤是walk的核心步骤,如果当前目录是一个普通的文件,那么会更新nd
		if (err < 0)                                    //如果当前目录是链接文件,那么nd不更新,会把链接文件路径更新到next
			return err;

		if (err) {
			err = nested_symlink(&next, nd); 	//根据next去定位实际目录路径,并把实际目录更新到nd中,其中会嵌套判断并follow多级的链接文件
			if (err)
				return err;
		}
		if (!d_can_lookup(nd->path.dentry)) {
			err = -ENOTDIR; 
			break;
		}
	}
	terminate_walk(nd);
	return err;
}

上述函数会在walk_component中尝试多种方式定位dentry,首先从lookup_fast快速walk目录项,如果内存缓存中没有,那么就进入slow_path从实际文件系统中读取,最终该函数会返回父目录的信息(包括path、dentry、inode等)到nameidata结构体,并把该结构传递给do_last进行最后一级目录的处理。

     error = do_last(nd, &path, file, op, &opened, pathname);
     while (unlikely(error > 0)) { /* trailing symlink */
         struct path link = path;
         void *cookie;
         if (!(nd->flags & LOOKUP_FOLLOW)) {
             path_put_conditional(&path, nd);
             path_put(&nd->path);
             error = -ELOOP;
             break;
         }
         error = may_follow_link(&link, nd);
         if (unlikely(error))
             break;
         nd->flags |= LOOKUP_PARENT;
         nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
         error = follow_link(&link, nd, &cookie);
         if (unlikely(error))
             break;
         error = do_last(nd, &path, file, op, &opened, pathname);
         put_link(nd, &link, cookie);
     }

如果最后一级目录是一个symlink的话,那么do_last会返回1,接着就会去follow对应的symlink,去找到真正的目录项,这里由于symlink是可能嵌套的,比如a->b->c,所以才会使用一个while循环进行follow,最终do_last进行处理。

static int do_last(struct nameidata *nd, struct path *path,
		   struct file *file, const struct open_flags *op,
		   int *opened, struct filename *name)
{
	struct dentry *dir = nd->path.dentry;
	int open_flag = op->open_flag;
	bool will_truncate = (open_flag & O_TRUNC) != 0;
	bool got_write = false;
	int acc_mode = op->acc_mode;
	struct inode *inode;
	bool symlink_ok = false;
	struct path save_parent = { .dentry = NULL, .mnt = NULL };
	bool retried = false;
	int error;

	nd->flags &= ~LOOKUP_PARENT;
	nd->flags |= op->intent;

	if (nd->last_type != LAST_NORM) {
		error = handle_dots(nd, nd->last_type);
		if (error)
			return error;
		goto finish_open;  //如果发现本次搜索的目录项是一个symlink类型,跳转到finish_open
	}

	if (!(open_flag & O_CREAT)) { //判断是否创建文件,如果不创建则执行如下操作
		if (nd->last.name[nd->last.len])
			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
		if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
			symlink_ok = true;  //如果打开文件存在O_PATH标记,并且nd->flags中不存在LOOKUP_FOLLOW,则搜索到symlink,不再进一步walk
		/* we _can_ be in RCU mode here */
		error = lookup_fast(nd, path, &inode); //lookup的fast路径,从内存中执行搜寻
		if (likely(!error))
			goto finish_lookup; //如果内存缓存中存在该目录项,则直接跳转到finish_lookup

		if (error < 0)
			goto out;

		BUG_ON(nd->inode != dir->d_inode);
	} else {              //判断打开文件标记存在O_CREAT,则执行如下
		/* create side of things */
		/*
		 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
		 * has been cleared when we got to the last component we are
		 * about to look up
		 */
		error = complete_walk(nd);//该函数用于退出RCU walk mode,后面会执行ref walk。
		if (error)
			return error;

		audit_inode(name, dir, LOOKUP_PARENT);
		error = -EISDIR;
		/* trailing slashes? */
		if (nd->last.name[nd->last.len])
			goto out;
	}

retry_lookup:	//如果代码执行到此,说明RCU walk失败了,下面开始使用核心的lookup_open进行walk查找
	if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { //如果打开标记有写入的权限
		error = mnt_want_write(nd->path.mnt);  //获取freeze write lock,防止文件系统冻住
		if (!error)
			got_write = true;
		/*
		 * do _not_ fail yet - we might not need that or fail with
		 * a different error; let lookup_open() decide; we'll be
		 * dropping this one anyway.
		 */
	}
	mutex_lock(&dir->d_inode->i_mutex);
	error = lookup_open(nd, path, file, op, got_write, opened); //这个函数执行后该文件可能被打开,也可能仅仅是lookup找到对应的目录项(会先从dcache寻找,失败后从real fs中寻找)
	mutex_unlock(&dir->d_inode->i_mutex);

	if (error <= 0) { //返回0说明已经atomic的创建和打开
		if (error)
			goto out;

		if ((*opened & FILE_CREATED) ||
		    !S_ISREG(file_inode(file)->i_mode))
			will_truncate = false;

		audit_inode(name, file->f_path.dentry, 0);
		goto opened;//跳转到opened
	}

	if (*opened & FILE_CREATED) { //这里说明error=1,说明还没有执行完打开操作
		/* Don't check for write permission, don't truncate */
		open_flag &= ~O_TRUNC;
		will_truncate = false;
		acc_mode = MAY_OPEN;
		path_to_nameidata(path, nd);
		goto finish_open_created; //如果文件已经被创建成功,则跳转到finish_open_created
	}

	/*
	 * create/update audit record if it already exists.
	 */
	if (d_is_positive(path->dentry))
		audit_inode(name, path->dentry, 0);

	/*
	 * If atomic_open() acquired write access it is dropped now due to
	 * possible mount and symlink following (this might be optimized away if
	 * necessary...)
	 */
	if (got_write) {
		mnt_drop_write(nd->path.mnt);
		got_write = false;
	}

	error = -EEXIST;
	if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
		goto exit_dput;

	error = follow_managed(path, nd->flags);
	if (error < 0)
		goto exit_dput;

	if (error)
		nd->flags |= LOOKUP_JUMPED;

	BUG_ON(nd->flags & LOOKUP_RCU);
	inode = path->dentry->d_inode;
finish_lookup: //运行到这里进行lookup的结尾工作
	/* we _can_ be in RCU mode here */
	error = -ENOENT;
	if (!inode || d_is_negative(path->dentry)) {
		path_to_nameidata(path, nd);
		goto out;
	}

	if (should_follow_link(path->dentry, !symlink_ok)) {
		if (nd->flags & LOOKUP_RCU) {
			if (unlikely(unlazy_walk(nd, path->dentry))) {
				error = -ECHILD;
				goto out;
			}
		}
		BUG_ON(inode != path->dentry->d_inode);
		return 1;
	}

	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
		path_to_nameidata(path, nd);
	} else {
		save_parent.dentry = nd->path.dentry;
		save_parent.mnt = mntget(path->mnt);
		nd->path.dentry = path->dentry;

	}
	nd->inode = inode;
	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
finish_open://运行到这里进行open的结尾工作
	error = complete_walk(nd);
	if (error) {
		path_put(&save_parent);
		return error;
	}
	audit_inode(name, nd->path.dentry, 0);
	error = -EISDIR;
	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
		goto out;
	error = -ENOTDIR;
	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
		goto out;
	if (!S_ISREG(nd->inode->i_mode))
		will_truncate = false;

	if (will_truncate) {
		error = mnt_want_write(nd->path.mnt);
		if (error)
			goto out;
		got_write = true;
	}
finish_open_created://运行到这里进行create后的open结尾工作,因为create执行后说明以上部分已经做过了,可以直接跳转到此
	error = may_open(&nd->path, acc_mode, open_flag);
	if (error)
		goto out;

	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
	error = vfs_open(&nd->path, file, current_cred());
	if (!error) {
		*opened |= FILE_OPENED;
	} else {
		if (error == -EOPENSTALE)
			goto stale_open;
		goto out;
	}
opened: //运行到这里说明已经open成功了,进行一些check操作就可以返回了
	error = open_check_o_direct(file);
	if (error)
		goto exit_fput;
	error = ima_file_check(file, op->acc_mode, *opened);
	if (error)
		goto exit_fput;

	if (will_truncate) {
		error = handle_truncate(file);
		if (error)
			goto exit_fput;
	}
out:
	if (got_write)
		mnt_drop_write(nd->path.mnt);
	path_put(&save_parent);
	terminate_walk(nd);
	return error;

exit_dput:
	path_put_conditional(path, nd);
	goto out;
exit_fput:
	fput(file);
	goto out;

stale_open:
	/* If no saved parent or already retried then can't retry */
	if (!save_parent.dentry || retried)
		goto out;

	BUG_ON(save_parent.dentry != dir);
	path_put(&nd->path);
	nd->path = save_parent;
	nd->inode = dir->d_inode;
	save_parent.mnt = NULL;
	save_parent.dentry = NULL;
	if (got_write) {
		mnt_drop_write(nd->path.mnt);
		got_write = false;
	}
	retried = true;
	goto retry_lookup;
}
  • fd_install
    上面的所有都是为了返回一个打开的文件结构体struct file,获取后还需要和特定fd进行绑定操作,就是通过下面的__fd_install来做的。
struct fdtable {
    unsigned int max_fds;
    struct file __rcu **fd;      /* current fd array */
    unsigned long *close_on_exec;
    unsigned long *open_fds;
    struct rcu_head rcu;
};  

void __fd_install(struct files_struct *files, unsigned int fd,
        struct file *file)
{
    struct fdtable *fdt;
    spin_lock(&files->file_lock);
    fdt = files_fdtable(files);
    BUG_ON(fdt->fd[fd] != NULL);
    rcu_assign_pointer(fdt->fd[fd], file); //更新对应进程fd对应的file结构体
    spin_unlock(&files->file_lock);
}    

这里使用的rcu的方式更新对应的file结构体指针。

你可能感兴趣的:(内核笔记)