arm64平台关于32位系统调用的定义:
#define __NR_open 5
__SYSCALL(__NR_open, compat_sys_open)
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
/*
* The sys_call_table array must be 4K aligned to be accessible from
* kernel/entry.S.
*/
void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = {
[0 ... __NR_compat_syscalls - 1] = sys_ni_syscall,
#include
};
arm64平台上64位系统调用的定义:
#define __NR_open 8
__SYSCALL( 8, sys_open, 3)
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
/*
* The sys_call_table array must be 4K aligned to be accessible from
* kernel/entry.S.
*/
void * const sys_call_table[__NR_syscalls] __aligned(4096) = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include
};
下面看一下sys_open的实现:
fs/open.c:
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
if (force_o_largefile())
flags |= O_LARGEFILE;
return do_sys_open(AT_FDCWD, filename, flags, mode);
}
通过宏定义展开,实际上上述定义实现了一个sys_open函数,可以看到核心是调用了do_sys_open函数去处理系统调用。下面来看一下它的实现:
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
int fd = build_open_flags(flags, mode, &op); //创建和生成文件打开flags
struct filename *tmp;
if (fd)
return fd;
tmp = getname(filename); //获取一个filename
if (IS_ERR(tmp))
return PTR_ERR(tmp);
fd = get_unused_fd_flags(flags); //获取一个未用的fd
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, &op); //打开操作,创建并填充对应的struct file结构体,前面已经获取了filename,这里是创建一个file结构体
//每次打开操作都会创建一个file,即使是同一个文件
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f);
fd_install(fd, f); //绑定对应的fd和struct file结构体,其实就是在对应的fd array上存上struct file
}
}
putname(tmp);
return fd;
}
下面分步骤来看,第一步就是getname,它最近调用的是getname_flags,这个函数的目的就是获取并初始化一个filename结构体:
struct filename {
const char *name; /* pointer to actual string */
const __user char *uptr; /* original userland pointer */
struct audit_names *aname;
int refcnt;
bool separate; /* should "name" be freed? */
};
filename的结构体如上所示,它的size大小是很小的,但是一个slab申请的内存大小为1024 bytes,所以除了这个结构体占用的长度之外,后面的长度可以用于存储实际的路径字符串。
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
struct filename *result, *err;
int len;
long max;
char *kname;
result = audit_reusename(filename); //从audit_names链表中查看是否存在相同的filename
if (result)
return result;
result = __getname(); //从slab中申请一块filename内存空间,注意这个大小是PATH_MAX=1024,而不是struct filename的size
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
result->refcnt = 1;
/*
* First, try to embed the struct filename inside the names_cache
* allocation
*/
/*先尝试在filename的结构体后面位置存储对应的路径字符串
* 由于是跟结构体连续在一起存储的,所以separate设置为false
*/
kname = (char *)result + sizeof(*result);
result->name = kname;
result->separate = false;
max = EMBEDDED_NAME_MAX;
recopy:
len = strncpy_from_user(kname, filename, max); //拷贝路径字符串到指定位置
if (unlikely(len < 0)) {
err = ERR_PTR(len);
goto error;
}
/*
* Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
* separate struct filename so we can dedicate the entire
* names_cache allocation for the pathname, and re-do the copy from
* userland.
*/
//运行到此处,发现要拷贝的路径字符串实际是大于申请的slab空间的
//因此需要重新分配内存进行拷贝操作,seperate设置为true
if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
kname = (char *)result;
result = kzalloc(sizeof(*result), GFP_KERNEL);
if (!result) {
err = ERR_PTR(-ENOMEM);
result = (struct filename *)kname;
goto error;
}
result->name = kname;
result->separate = true;
result->refcnt = 1;
max = PATH_MAX;
goto recopy; //申请到内存后跳转到recopy,重新执行拷贝操作
}
/* The empty path is special. */
if (unlikely(!len)) {
if (empty)
*empty = 1;
err = ERR_PTR(-ENOENT);
if (!(flags & LOOKUP_EMPTY))
goto error;
}
err = ERR_PTR(-ENAMETOOLONG);
if (unlikely(len >= PATH_MAX))
goto error;
result->uptr = filename;
result->aname = NULL;
audit_getname(result); //把对应新创建的filename加入audit审计管理链表中
return result;
error:
putname(result);
return err;
}
这里需要注意一点就是audit_names,每个进程都有对应的审计上下文,其中会保存一个audit_names链表,每个打开的filename都会对应一个audit_names结构,它的作用是用于审计,比如selinux权限检查。
下面是第二步,根据传入的flags获取未用的fd结构:
int get_unused_fd_flags(unsigned flags)
{
return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}
EXPORT_SYMBOL(get_unused_fd_flags);
/*
* allocate a file descriptor, mark it busy.
*/
int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags)
{
unsigned int fd;
int error;
struct fdtable *fdt;
spin_lock(&files->file_lock);
repeat:
fdt = files_fdtable(files); //获取files_struct中的fdtable成员,fdtable会存在一个file结构体数组
fd = start;
if (fd < files->next_fd) //检查files_struct中记录到下一个fd号
fd = files->next_fd;
if (fd < fdt->max_fds) //从下一个fd号开始寻找到下一个未用的fd号
//这是一个位图操作,max_fds有多大,就对应多少个bit的位图,初始化为一个long类型,对于arm32就是32个bits
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
error = -EMFILE;
if (fd >= end)
goto out;
/*如果发现fd是大于max_fds时会执行到这里,那么会进行expand操作
* 实际上跟进去会发现它会新创建更大的fdtable并更新到files_struct中。
*/
error = expand_files(files, fd);
if (error < 0)
goto out;
/*
* If we needed to expand the fs array we
* might have blocked - try again.
*/
if (error)
goto repeat;
if (start <= files->next_fd)
files->next_fd = fd + 1;
__set_open_fd(fd, fdt);
if (flags & O_CLOEXEC)
__set_close_on_exec(fd, fdt); //和open_fds一样是位图操作,初始化为一个long type,对于arm32就是32个bits
else
__clear_close_on_exec(fd, fdt);
error = fd;
#if 1
/* Sanity check */
if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
}
#endif
out:
spin_unlock(&files->file_lock);
return error;
}
看下他的代码实现:
struct file *do_filp_open(int dfd, struct filename *pathname,
const struct open_flags *op)
{
struct nameidata nd;
int flags = op->lookup_flags;
struct file *filp;
filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); //默认第一次打开是通过rcu walk形式进行打开操作,效率最高
if (unlikely(filp == ERR_PTR(-ECHILD)))
filp = path_openat(dfd, pathname, &nd, op, flags); //RCU打开失败后,ref-walk形式打开,可能会睡眠
if (unlikely(filp == ERR_PTR(-ESTALE)))
filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);//这种是在前面打开都已经失败的情况下才执行的,比如文件过期(STALE)
return filp;
}
这个函数最终都会调用path_openat进行下一步操作:
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
struct file *file;
struct path path;
int opened = 0;
int error;
file = get_empty_filp(); //1.申请slab,获取一个空的struct file结构体
if (IS_ERR(file))
return file;
file->f_flags = op->open_flag;
if (unlikely(file->f_flags & __O_TMPFILE)) {
error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
goto out;
}
error = path_init(dfd, pathname->name, flags, nd); //2.检索对应目标文件所属的父目录项,初始化nameidata结构体,主要包含filename(dentry)和inode等信息
if (unlikely(error))
goto out;
error = do_last(nd, &path, file, op, &opened, pathname); //3.最后处理最后一级子目录项,如果发现最后一级是一个链接,那么会报错返回
while (unlikely(error > 0)) { /* trailing symlink */ //4.如果发现最后一级子目录项是一个链接,那么要继续跟踪到实际目录
struct path link = path;
void *cookie;
if (!(nd->flags & LOOKUP_FOLLOW)) {
path_put_conditional(&path, nd);
path_put(&nd->path);
error = -ELOOP;
break;
}
error = may_follow_link(&link, nd);
if (unlikely(error))
break;
nd->flags |= LOOKUP_PARENT;
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
error = follow_link(&link, nd, &cookie);
if (unlikely(error))
break;
error = do_last(nd, &path, file, op, &opened, pathname);//5.跟踪到实际目录后,重新调用do_last进行处理最后一级
put_link(nd, &link, cookie);
}
out:
path_cleanup(nd);
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
}
if (unlikely(error)) {
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
else
error = -ESTALE;
}
file = ERR_PTR(error);
}
return file;
}
开始介绍path_init之前,先要介绍一下struct nameidata结构体,这个结构体就是用于目录检索时保存信息的,每次检索一级目录都会更新该结构体中的内容。
struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq, m_seq;
int last_type;
unsigned depth;
struct file *base;
char *saved_names[MAX_NESTED_LINKS + 1];
};
其中path表示当前查找的这一级目录路径,last表示当前自路径及其散列值,root表示当前目录对应的根目录,seq是目录项顺序锁,m_seq是文件系统mount顺序锁,last_type表示当前目录的类型:
/*
* Type of the last component on LOOKUP_PARENT
*/
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
其中LAST_NORM表示普通文件类型,LAST_ROOT表示root类型,LAST_DOT表示“.”,LAST_DOTDOT表示“…”,LAST_BIND表示文件连接类型。
static int path_init(int dfd, const char *name, unsigned int flags,
struct nameidata *nd)
{
int retval = 0;
nd->last_type = LAST_ROOT; /* if there are only slashes... */ //默认类型为ROOT类型
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
nd->base = NULL;
if (flags & LOOKUP_ROOT) { //如果打开标志包含LOOKUP_ROOT
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
if (*name) {
if (!d_can_lookup(root))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
if (retval)
return retval;
}
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
goto done;
}
nd->root.mnt = NULL;
nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') { //1.第一种情况如果路径名第一个为/,说明是绝对路径
if (flags & LOOKUP_RCU) {
rcu_read_lock();
nd->seq = set_root_rcu(nd);
} else {
set_root(nd);
path_get(&nd->root);
}
nd->path = nd->root; //设置绝对路径的nd->path为nd->root
} else if (dfd == AT_FDCWD) {//2.第二种情况相对路径是当前进程的工作路径
if (flags & LOOKUP_RCU) {
struct fs_struct *fs = current->fs;
unsigned seq;
rcu_read_lock();
do {
seq = read_seqcount_begin(&fs->seq);
nd->path = fs->pwd; //设置nd->path
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
} while (read_seqcount_retry(&fs->seq, seq));
} else {
get_fs_pwd(current->fs, &nd->path); //设置nd->path
}
} else { //3.第三种情况相对路径是一个用户指定的路径名
/* Caller must check execute permissions on the starting path component */
struct fd f = fdget_raw(dfd);
struct dentry *dentry;
if (!f.file)
return -EBADF;
dentry = f.file->f_path.dentry;
if (*name) {
if (!d_can_lookup(dentry)) {
fdput(f);
return -ENOTDIR;
}
}
nd->path = f.file->f_path; //设置nd->path
if (flags & LOOKUP_RCU) {
if (f.flags & FDPUT_FPUT)
nd->base = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
} else {
path_get(&nd->path);
fdput(f);
}
}
nd->inode = nd->path.dentry->d_inode; //设置nd->inode为对应nd->path.dentry->d_inode
if (!(flags & LOOKUP_RCU))
goto done;
if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
goto done;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
return -ECHILD;
done:
current->total_link_count = 0;
return link_path_walk(name, nd); //设置完了nd->path和nd->inode之后就开始最终的link_path_walk了
}
path_init的目标是搜索目标路径的父目录,并把父目录的信息更新到nameidata结构体中,通过上面的注释可以看到,他会先设置nd->path为起始路径,从起始路径开始进行检索。比如,对于AT_FDCWD类型的系统调用来说,会以进程当前路径作为起始检索路径开始搜索。设置nd->path为起始路径后开始执行link_path_walk开始进行循环检索,每次检索一级目录,并且更新nameidata结构体,直到最终的父目录。
static int link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
int err;
while (*name=='/')
name++; //过滤掉起始位置多余的/字符
if (!*name)
return 0;//如果发现/后面已经没有字符,说明是根目录直接返回
/* At this point we know we have a real path component. */
for(;;) {
u64 hash_len;
int type;
err = may_lookup(nd);
if (err)
break;
hash_len = hash_name(name); //开始对第一级路径做hash处理
type = LAST_NORM;
if (name[0] == '.') switch (hashlen_len(hash_len)) { //处理当前目录名为.和..的情况
case 2:
if (name[1] == '.') {
type = LAST_DOTDOT;
nd->flags |= LOOKUP_JUMPED;
}
break;
case 1:
type = LAST_DOT;
}
if (likely(type == LAST_NORM)) { //处理类型为LAST_NORM的情况,也就是普通文件
struct dentry *parent = nd->path.dentry;
nd->flags &= ~LOOKUP_JUMPED;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { //判断是否要重新进行一次HASH操作
struct qstr this = { { .hash_len = hash_len }, .name = name };
err = parent->d_op->d_hash(parent, &this);
if (err < 0)
break;
hash_len = this.hash_len;
name = this.name;
}
}
nd->last.hash_len = hash_len;
nd->last.name = name;
nd->last_type = type;
name += hashlen_len(hash_len); //name指向当前目录的下一级目录
if (!*name)
return 0;
/*
* If it wasn't NUL, we know it was '/'. Skip that
* slash, and continue until no more slashes.
*/
do {
name++;
} while (unlikely(*name == '/')); //这里需要特别注意,为什么说本函数只处理到最终路径的父目录
if (!*name) //当发现当前目录的下一级目录为空,说明当前为最后一级目录时,直接返回,不做walk_component核心处理
return 0; //这样就跳过了最后一级的处理,留给后面的do_last进行处理
err = walk_component(nd, &next, LOOKUP_FOLLOW); //该步骤是walk的核心步骤,如果当前目录是一个普通的文件,那么会更新nd
if (err < 0) //如果当前目录是链接文件,那么nd不更新,会把链接文件路径更新到next
return err;
if (err) {
err = nested_symlink(&next, nd); //根据next去定位实际目录路径,并把实际目录更新到nd中,其中会嵌套判断并follow多级的链接文件
if (err)
return err;
}
if (!d_can_lookup(nd->path.dentry)) {
err = -ENOTDIR;
break;
}
}
terminate_walk(nd);
return err;
}
上述函数会在walk_component中尝试多种方式定位dentry,首先从lookup_fast快速walk目录项,如果内存缓存中没有,那么就进入slow_path从实际文件系统中读取,最终该函数会返回父目录的信息(包括path、dentry、inode等)到nameidata结构体,并把该结构传递给do_last进行最后一级目录的处理。
error = do_last(nd, &path, file, op, &opened, pathname);
while (unlikely(error > 0)) { /* trailing symlink */
struct path link = path;
void *cookie;
if (!(nd->flags & LOOKUP_FOLLOW)) {
path_put_conditional(&path, nd);
path_put(&nd->path);
error = -ELOOP;
break;
}
error = may_follow_link(&link, nd);
if (unlikely(error))
break;
nd->flags |= LOOKUP_PARENT;
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
error = follow_link(&link, nd, &cookie);
if (unlikely(error))
break;
error = do_last(nd, &path, file, op, &opened, pathname);
put_link(nd, &link, cookie);
}
如果最后一级目录是一个symlink的话,那么do_last会返回1,接着就会去follow对应的symlink,去找到真正的目录项,这里由于symlink是可能嵌套的,比如a->b->c,所以才会使用一个while循环进行follow,最终do_last进行处理。
static int do_last(struct nameidata *nd, struct path *path,
struct file *file, const struct open_flags *op,
int *opened, struct filename *name)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false;
int acc_mode = op->acc_mode;
struct inode *inode;
bool symlink_ok = false;
struct path save_parent = { .dentry = NULL, .mnt = NULL };
bool retried = false;
int error;
nd->flags &= ~LOOKUP_PARENT;
nd->flags |= op->intent;
if (nd->last_type != LAST_NORM) {
error = handle_dots(nd, nd->last_type);
if (error)
return error;
goto finish_open; //如果发现本次搜索的目录项是一个symlink类型,跳转到finish_open
}
if (!(open_flag & O_CREAT)) { //判断是否创建文件,如果不创建则执行如下操作
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
symlink_ok = true; //如果打开文件存在O_PATH标记,并且nd->flags中不存在LOOKUP_FOLLOW,则搜索到symlink,不再进一步walk
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, path, &inode); //lookup的fast路径,从内存中执行搜寻
if (likely(!error))
goto finish_lookup; //如果内存缓存中存在该目录项,则直接跳转到finish_lookup
if (error < 0)
goto out;
BUG_ON(nd->inode != dir->d_inode);
} else { //判断打开文件标记存在O_CREAT,则执行如下
/* create side of things */
/*
* This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
* has been cleared when we got to the last component we are
* about to look up
*/
error = complete_walk(nd);//该函数用于退出RCU walk mode,后面会执行ref walk。
if (error)
return error;
audit_inode(name, dir, LOOKUP_PARENT);
error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto out;
}
retry_lookup: //如果代码执行到此,说明RCU walk失败了,下面开始使用核心的lookup_open进行walk查找
if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { //如果打开标记有写入的权限
error = mnt_want_write(nd->path.mnt); //获取freeze write lock,防止文件系统冻住
if (!error)
got_write = true;
/*
* do _not_ fail yet - we might not need that or fail with
* a different error; let lookup_open() decide; we'll be
* dropping this one anyway.
*/
}
mutex_lock(&dir->d_inode->i_mutex);
error = lookup_open(nd, path, file, op, got_write, opened); //这个函数执行后该文件可能被打开,也可能仅仅是lookup找到对应的目录项(会先从dcache寻找,失败后从real fs中寻找)
mutex_unlock(&dir->d_inode->i_mutex);
if (error <= 0) { //返回0说明已经atomic的创建和打开
if (error)
goto out;
if ((*opened & FILE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
audit_inode(name, file->f_path.dentry, 0);
goto opened;//跳转到opened
}
if (*opened & FILE_CREATED) { //这里说明error=1,说明还没有执行完打开操作
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
acc_mode = MAY_OPEN;
path_to_nameidata(path, nd);
goto finish_open_created; //如果文件已经被创建成功,则跳转到finish_open_created
}
/*
* create/update audit record if it already exists.
*/
if (d_is_positive(path->dentry))
audit_inode(name, path->dentry, 0);
/*
* If atomic_open() acquired write access it is dropped now due to
* possible mount and symlink following (this might be optimized away if
* necessary...)
*/
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
}
error = -EEXIST;
if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
goto exit_dput;
error = follow_managed(path, nd->flags);
if (error < 0)
goto exit_dput;
if (error)
nd->flags |= LOOKUP_JUMPED;
BUG_ON(nd->flags & LOOKUP_RCU);
inode = path->dentry->d_inode;
finish_lookup: //运行到这里进行lookup的结尾工作
/* we _can_ be in RCU mode here */
error = -ENOENT;
if (!inode || d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}
if (should_follow_link(path->dentry, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
goto out;
}
}
BUG_ON(inode != path->dentry->d_inode);
return 1;
}
if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
path_to_nameidata(path, nd);
} else {
save_parent.dentry = nd->path.dentry;
save_parent.mnt = mntget(path->mnt);
nd->path.dentry = path->dentry;
}
nd->inode = inode;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
finish_open://运行到这里进行open的结尾工作
error = complete_walk(nd);
if (error) {
path_put(&save_parent);
return error;
}
audit_inode(name, nd->path.dentry, 0);
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
goto out;
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
if (will_truncate) {
error = mnt_want_write(nd->path.mnt);
if (error)
goto out;
got_write = true;
}
finish_open_created://运行到这里进行create后的open结尾工作,因为create执行后说明以上部分已经做过了,可以直接跳转到此
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file, current_cred());
if (!error) {
*opened |= FILE_OPENED;
} else {
if (error == -EOPENSTALE)
goto stale_open;
goto out;
}
opened: //运行到这里说明已经open成功了,进行一些check操作就可以返回了
error = open_check_o_direct(file);
if (error)
goto exit_fput;
error = ima_file_check(file, op->acc_mode, *opened);
if (error)
goto exit_fput;
if (will_truncate) {
error = handle_truncate(file);
if (error)
goto exit_fput;
}
out:
if (got_write)
mnt_drop_write(nd->path.mnt);
path_put(&save_parent);
terminate_walk(nd);
return error;
exit_dput:
path_put_conditional(path, nd);
goto out;
exit_fput:
fput(file);
goto out;
stale_open:
/* If no saved parent or already retried then can't retry */
if (!save_parent.dentry || retried)
goto out;
BUG_ON(save_parent.dentry != dir);
path_put(&nd->path);
nd->path = save_parent;
nd->inode = dir->d_inode;
save_parent.mnt = NULL;
save_parent.dentry = NULL;
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
}
retried = true;
goto retry_lookup;
}
struct fdtable {
unsigned int max_fds;
struct file __rcu **fd; /* current fd array */
unsigned long *close_on_exec;
unsigned long *open_fds;
struct rcu_head rcu;
};
void __fd_install(struct files_struct *files, unsigned int fd,
struct file *file)
{
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file); //更新对应进程fd对应的file结构体
spin_unlock(&files->file_lock);
}
这里使用的rcu的方式更新对应的file结构体指针。