继上篇文章Linux内核源代码情景分析-特殊文件系统/proc,我们对/proc/loadavg访问后,这篇文章是对/proc/self/cwd的访问。
int __user_walk(const char *name, unsigned flags, struct nameidata *nd) { char *tmp; int err; tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面 err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { err = 0; if (path_init(tmp, flags, nd)) err = path_walk(tmp, nd); putname(tmp); } return err; }name就为 /proc/self/cwd,重要分析下path_walk函数,请参考Linux内核源代码情景分析-从路径名到目标节点。
第一次循环path_walk发现/proc是个安装节点而通过_follow_down找到了proc文件系统的根节点的dentry结构,nameidata结构中的指针dentry指向这个数据结构。、
第二次循环搜索路径名中的下一个节点self,由于这个节点并不是路径名的最后一个节点,所以执行的代码如下:
dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//在内存中寻找该节点业已建立的dentry结构 if (!dentry) {//如果没有找到 dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//那么就要建立该节点的dentry结构 err = PTR_ERR(dentry); if (IS_ERR(dentry)) break; }参考 Linux内核源代码情景分析-特殊文件系统/proc,最终也要通过proc_root_lookup()调用proc_lookup(),试图为节点建立起其dentry结构和inode结构。可是由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败,因而会进一步调用proc_pid_lookup(),代码如下:
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) { if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ int nlink = proc_root.nlink; nlink += nr_threads; dir->i_nlink = nlink; } if (!proc_lookup(dir, dentry))///由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败 return NULL; return proc_pid_lookup(dir, dentry);//会调用这个函数 }
struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry) { struct inode *inode; struct proc_dir_entry * de; int error; error = -ENOENT; inode = NULL; de = (struct proc_dir_entry *) dir->u.generic_ip; if (de) {//找不到/proc/self节点 for (de = de->subdir; de ; de = de->next) { if (!de || !de->low_ino) continue; if (de->namelen != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { int ino = de->low_ino; error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); break; } } } if (inode) { dentry->d_op = &proc_dentry_operations; d_add(dentry, inode); return NULL; } return ERR_PTR(error);//返回错误码 }
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) { unsigned int pid, c; struct task_struct *task; const char *name; struct inode *inode; int len; pid = 0; name = dentry->d_name.name; len = dentry->d_name.len; if (len == 4 && !memcmp(name, "self", 4)) {//执行这里,name等于self inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(0, PROC_PID_INO); inode->u.proc_i.file = NULL; inode->u.proc_i.task = NULL; inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_uid = inode->i_gid = 0; inode->i_size = 64; inode->i_op = &proc_self_inode_operations; d_add(dentry, inode); return NULL;//返回了 } while (len-- > 0) { c = *name - '0'; name++; if (c > 9) goto out; if (pid >= MAX_MULBY10) goto out; pid *= 10; pid += c; if (!pid) goto out; } read_lock(&tasklist_lock); task = find_task_by_pid(pid); if (task) get_task_struct(task); read_unlock(&tasklist_lock); if (!task) goto out; inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO); free_task_struct(task); if (!inode) goto out; inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_base_inode_operations; inode->i_fop = &proc_base_operations; inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; dentry->d_op = &pid_base_dentry_operations; d_add(dentry, inode); return NULL; out: return ERR_PTR(-ENOENT); }其中proc_self_inode_operations结构定义如下:
static struct inode_operations proc_self_inode_operations = { readlink: proc_self_readlink, follow_link: proc_self_follow_link, };还是第二轮循环,从proc_root_lookup返回到path_walk中以后,接着要检查和处理两件事,第一件是新找到的节点是否为安装点;第二件就是它是否是一个连接节点。这正是我们在这里所关心的,因为/proc/self就是个连接节点。继续看path_walk,代码如下:
if (inode->i_op->follow_link) {//看看这个指针是否为NULL,这个指针是在ext2_read_inode中设置的 err = do_follow_link(dentry, nd);
static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为proc/self节点的dentry结构 { int err; if (current->link_count >= 8) goto loop; current->link_count++; UPDATE_ATIME(dentry->d_inode); err = dentry->d_inode->i_op->follow_link(dentry, nd);//proc_self_follow_link current->link_count--; return err; loop: path_release(nd); return -ELOOP; }entry->d_inode->i_op->follow_link指向proc_self_follow_link,代码如下:
static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; sprintf(tmp, "%d", current->pid); return vfs_follow_link(nd,tmp); }
int vfs_follow_link(struct nameidata *nd, const char *link) { return __vfs_follow_link(nd, link); }
static inline int __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; char *name; if (IS_ERR(link)) goto fail; if (*link == '/') { path_release(nd); if (!walk_init_root(link, nd)) /* weird __emul_prefix() stuff did it */ goto out; } res = path_walk(link, nd); out: if (current->link_count || res || nd->last_type!=LAST_NORM) return res; /* * If it is an iterative symlinks resolution in open_namei() we * have to copy the last component. And all that crap because of * bloody create() on broken symlinks. Furrfu... */ name = __getname(); if (IS_ERR(name)) goto fail_name; strcpy(name, nd->last.name); nd->last.name = name; return 0; fail_name: link = name; fail: path_release(nd); return PTR_ERR(link); }在__vfs_follow_link()中会调用path_walk()来寻找连接的目标节点,所以又会调用其父节点/proc的lookup函数,即proc_root_lookup(),不同的只是这次寻找的不是"self",而是当前进程的pid字符串。
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) { unsigned int pid, c; struct task_struct *task; const char *name; struct inode *inode; int len; pid = 0; name = dentry->d_name.name; len = dentry->d_name.len; if (len == 4 && !memcmp(name, "self", 4)) {//不执行这里,name不等于self inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(0, PROC_PID_INO); inode->u.proc_i.file = NULL; inode->u.proc_i.task = NULL; inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_uid = inode->i_gid = 0; inode->i_size = 64; inode->i_op = &proc_self_inode_operations; d_add(dentry, inode); return NULL;//返回了 } while (len-- > 0) {//执行这里 c = *name - '0'; name++; if (c > 9) goto out; if (pid >= MAX_MULBY10) goto out; pid *= 10; pid += c; if (!pid) goto out; } read_lock(&tasklist_lock); task = find_task_by_pid(pid); if (task) get_task_struct(task); read_unlock(&tasklist_lock); if (!task) goto out; inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO); free_task_struct(task); if (!inode) goto out; inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_base_inode_operations;//注意这个指针,一会会用到 inode->i_fop = &proc_base_operations; inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; dentry->d_op = &pid_base_dentry_operations; d_add(dentry, inode); return NULL; out: return ERR_PTR(-ENOENT); }
static struct inode_operations proc_base_inode_operations = { lookup: proc_base_lookup, };
proc_pid_make_inode,为进程创建一个inode结构
static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) { struct inode * inode; /* We need a new inode */ inode = new_inode(sb); if (!inode) goto out; /* Common stuff */ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(task->pid, ino); inode->u.proc_i.file = NULL; /* * grab the reference to task. */ inode->u.proc_i.task = task;//当前进程的task赋值到这里 get_task_struct(task); if (!task->p_pptr) goto out_unlock; inode->i_uid = 0; inode->i_gid = 0; if (ino == PROC_PID_INO || task->dumpable) { inode->i_uid = task->euid; inode->i_gid = task->egid; } out: return inode; out_unlock: iput(inode); return NULL; }
从path_walk返回后,nd->dentry已指向代表着当前进程的目录节点的dentry结构,之后层层返回到proc_self_follow_link,最后返回到主path_walk的第二次循环中,开始执行第三次循环。
第三次循环,最后一个节点是"cwd",这一次所搜索的节点已经是路径名中的最后一个节点,所以转到last_component的地方,同样也是在real_lookup()中通过父节点的inode_operations结构中的lookup函数指针执行实际的操作,也就是proc_base_lookup,代码如下:
static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) { struct inode *inode; int error; struct task_struct *task = dir->u.proc_i.task;//取出当前进程的task struct pid_entry *p; error = -ENOENT; inode = NULL; for (p = base_stuff; p->name; p++) {//在base_stuff中找到cwd,关于base_stuff如下面所示 if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } if (!p->name) goto out; error = -EINVAL; inode = proc_pid_make_inode(dir->i_sb, task, p->type);//p->type为5 if (!inode) goto out; inode->i_mode = p->mode; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<pid>/ without very good reasons. */ switch(p->type) { case PROC_PID_FD: inode->i_nlink = 2; inode->i_op = &proc_fd_inode_operations; inode->i_fop = &proc_fd_operations; break; case PROC_PID_EXE: inode->i_op = &proc_pid_link_inode_operations; inode->u.proc_i.op.proc_get_link = proc_exe_link; break; case PROC_PID_CWD: inode->i_op = &proc_pid_link_inode_operations;//两者很重要 inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info,结构如下面所示 break; case PROC_PID_ROOT: inode->i_op = &proc_pid_link_inode_operations; inode->u.proc_i.op.proc_get_link = proc_root_link; break; case PROC_PID_ENVIRON: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_environ; break; case PROC_PID_STATUS: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_status; break; case PROC_PID_STAT: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_stat; break; case PROC_PID_CMDLINE: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_cmdline; break; case PROC_PID_STATM: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_statm; break; case PROC_PID_MAPS: inode->i_fop = &proc_maps_operations; break; #ifdef CONFIG_SMP case PROC_PID_CPU: inode->i_fop = &proc_info_file_operations; inode->u.proc_i.op.proc_read = proc_pid_cpu; break; #endif case PROC_PID_MEM: inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; default: printk("procfs: impossible type (%d)",p->type); iput(inode); return ERR_PTR(-EINVAL); } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; out: return ERR_PTR(error); }
struct pid_entry { int type; int len; char *name; mode_t mode; }; enum pid_directory_inos { PROC_PID_INO = 2, PROC_PID_STATUS, PROC_PID_MEM, PROC_PID_CWD, PROC_PID_ROOT, PROC_PID_EXE, PROC_PID_FD, PROC_PID_ENVIRON, PROC_PID_CMDLINE, PROC_PID_STAT, PROC_PID_STATM, PROC_PID_MAPS, PROC_PID_CPU, PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} static struct pid_entry base_stuff[] = { E(PROC_PID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), E(PROC_PID_ENVIRON, "environ", S_IFREG|S_IRUSR), E(PROC_PID_STATUS, "status", S_IFREG|S_IRUGO), E(PROC_PID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), E(PROC_PID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_PID_STATM, "statm", S_IFREG|S_IRUGO), #ifdef CONFIG_SMP E(PROC_PID_CPU, "cpu", S_IFREG|S_IRUGO), #endif E(PROC_PID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_PID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), E(PROC_PID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_PID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_PID_EXE, "exe", S_IFLNK|S_IRWXUGO), {0,0,NULL,0} }; #undef E
struct proc_inode_info { struct task_struct *task; int type; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); } op; struct file *file; };从proc_base_lookup()经由real_lookup()返回到path_walk时,返回值dentry已经指向了这个特定"cwd"节点dentry结构。但是接着同样要受到对其Inode结构中的i_op指针以及相应inode_operations结构的指针follow_link的检验,看path_walk的代码:
inode = dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW)//和第一次和第二次循环不同,必须LOOKUP_FOLLOW标志位置1 && inode && inode->i_op && inode->i_op->follow_link) { err = do_follow_link(dentry, nd);
static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) { int err; if (current->link_count >= 8) goto loop; current->link_count++; UPDATE_ATIME(dentry->d_inode); err = dentry->d_inode->i_op->follow_link(dentry, nd); current->link_count--; return err; loop: path_release(nd); return -ELOOP; }
dentry->d_inode->i_op指向了proc_pid_link_inode_operations结构,是在这里设置的:
case PROC_PID_CWD: inode->i_op = &proc_pid_link_inode_operations;//两者很重要 inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info结构
static struct inode_operations proc_pid_link_inode_operations = { readlink: proc_pid_readlink, follow_link: proc_pid_follow_link };dentry->d_inode->i_op->follow_link(dentry, nd),proc_pid_follow_link (dentry, nd),也就是代码如下:
static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为"cwd"节点dentry结构 { struct inode *inode = dentry->d_inode; int error = -EACCES; /* We don't need a base pointer in the /proc filesystem */ path_release(nd); if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) goto out; error = proc_check_root(inode); if (error) goto out; error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);//也就是proc_cwd_link nd->last_type = LAST_BIND; out: return error; }inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt),也就是proc_cwd_link( inode, &nd->dentry, &nd->mnt),代码如下:
static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct fs_struct *fs; int result = -ENOENT; task_lock(inode->u.proc_i.task); fs = inode->u.proc_i.task->fs;//task指向相应进程的task_struct结构,进而可以得到这个进程的fs_struct结构 if(fs) atomic_inc(&fs->count); task_unlock(inode->u.proc_i.task); if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->pwdmnt);//nd->mnt指向了该目录所在设备安装时的vfsmount结构 *dentry = dget(fs->pwd);//nd->dentry指向了该进程的"当前工作目录"的dentry结构 read_unlock(&fs->lock); result = 0; put_fs_struct(fs); } return result; }当从proc_cwd_link()经由do_follow_link()返回到path_walk()中时,nameidata结构中指针已经指向最终的目标,即当前进程的当前工作目录。
也就是:
int __user_walk(const char *name, unsigned flags, struct nameidata *nd) { char *tmp; int err; tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面 err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { err = 0; if (path_init(tmp, flags, nd)) err = path_walk(tmp, nd); putname(tmp); } return err; }此时nd->mnt指向了该目录所在设备安装时的vfsmount结构,nd->dentry指向了该进程的"当前工作目录"的dentry结构。