devtmpfs分析

转自:devtmpfs分析_zdy0_2004的博客-CSDN博客

1 初始化

  1.1 文件系统注册

2 运行

  2.1 devtmpfsd进程

  2.2 创建设备文件

    2.2.1 遍历路径

    2.2.2 构建目录

    2.2.3 构建设备文件

  2.2 二次mount

3 与统一设备模型的接口

devtmpfs主要完成了对设备文件创建的管理工作。它是统一设备模型的基础之一。该fs在初始化过程中分为两个阶段。在内核启动阶段,完成了fs的注册以及后台进程的创建;在mev或udev或某些启动命令下,完成了fs的二次mount。 参考内核版本为3.17内核。

1 初始化

与devtmpfs相关的代码位于drivers\base\devtmpfs.c文件。devtmpfs文件系统的初始化同驱动初始化同时进行,调用路径如下:

do_basic_setup()-->driver_init()-->devtmpfs_init()。

devtmpfs_init()函数一方面完成了文件系统的注册;另一方启动了一个进程kdevtmpfs。

int __init devtmpfs_init(void)

{

       int err = register_filesystem(&dev_fs_type);

       if (err) {

              printk(KERN_ERR "devtmpfs: unable to register devtmpfs "

                     "type %i\n", err);

              return err;

       }

       thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");

       if (!IS_ERR(thread)) {

              wait_for_completion(&setup_done);

       } else {

              err = PTR_ERR(thread);

              thread = NULL;

       }

       if (err) {

              printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);

              unregister_filesystem(&dev_fs_type);

              return err;

       }

       printk(KERN_INFO "devtmpfs: initialized\n");

       return 0;

}

1.1 文件系统注册

devtmpfs文件系统的注册由register_filesystem(&dev_fs_type)完成。

文件系统类型定义:

static struct file_system_type dev_fs_type = {

      .name = "devtmpfs",

      .mount = dev_mount,

      .kill_sb = kill_litter_super,

};

2 运行

在内核的初始化阶段完成后,devtmpfs就已经可以被统一设备模型子系统使用,但是还不能被用户使用。

2.1 devtmpfsd进程

static int devtmpfsd(void *p)

{

       char options[] = "mode=0755";

       int *err = p;

       *err = sys_unshare(CLONE_NEWNS);

       if (*err)

              goto out;

       *err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options);  //第一次mount

       if (*err)

              goto out;

       sys_chdir("/.."); /* will traverse into overmounted root */

       sys_chroot(".");

       complete(&setup_done);

       while (1) {

              spin_lock(&req_lock);

              while (requests) {

                     struct req *req = requests;

                     requests = NULL;

                     spin_unlock(&req_lock);

                     while (req) {

                            struct req *next = req->next;

                            req->err = handle(req->name, req->mode,

                                            req->uid, req->gid, req->dev);

                            complete(&req->done);

                            req = next;

                     }

                     spin_lock(&req_lock);

              }

              __set_current_state(TASK_INTERRUPTIBLE);

              spin_unlock(&req_lock);

              schedule();                                             //睡眠该进程:等待create或delete方法来激活进程

       }

       return 0;

out:

       complete(&setup_done);

       return *err;

}

该进程在fs初始化时创建。主要完成了fs的第一次mount工作,然后进入while循环,在循环体内部,设置进程状态为TASK_INTERRUPTIBLE,换出进程,等待被唤醒。

kdevtmpfs进程被唤醒离不开数据结构req:

static struct req {

      struct req *next;

      struct completion done;

      int err;

      const char *name;

      umode_t mode;  /* 0 => delete */

      kuid_t uid;

      kgid_t gid;

      struct device *dev;

} *requests;

定义了struct req类型的requests变量;客户进程通过构建req,并插入request链表来请求建立设备文件的服务。

req结构体的name成员即为设备文件的路径名,然而路径名是不带/dev前缀。比如”/dev/input/eventX”文件建立时,传递给devtmpfs的路径名却是”input/eventX”。理解这点涉及到vfs和进程的概念。

2.2 创建设备文件

当有客户进程需要创建设备文件,就会唤醒devtmpfsd进程。该进程会执行handle(req->name, req->mode,req->uid, req->gid, req->dev)操作。最终调用static int handle_create()函数。

static int handle_create(const char *nodename, umode_t mode, kuid_t uid,

                      kgid_t gid, struct device *dev)

{

       struct dentry *dentry;

       struct path path;

       int err;

       dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);-----------------------(1)

       if (dentry == ERR_PTR(-ENOENT)) {                                               

              create_path(nodename);-------------------------------------------------------------------(2)

              dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);------------------(3)

       }

       if (IS_ERR(dentry))

              return PTR_ERR(dentry);

       err = vfs_mknod(path.dentry->d_inode, dentry, mode, dev->devt);-------------------(4)

       if (!err) {

              struct iattr newattrs;

              newattrs.ia_mode = mode;

              newattrs.ia_uid = uid;

              newattrs.ia_gid = gid;

              newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;

              mutex_lock(&dentry->d_inode->i_mutex);

              notify_change(dentry, &newattrs, NULL);

              mutex_unlock(&dentry->d_inode->i_mutex);

              /* mark as kernel-created inode */

              dentry->d_inode->i_private = &thread;

       }

       done_path_create(&path, dentry);

       return err;

}

(1)负责查找父路径的dentry;(2)负责构建目录;(4)负责构建目标设备文件。

2.2.1 遍历路径

先看(1):

struct dentry *kern_path_create(int dfd, const char *pathname,

                            struct path *path, unsigned int lookup_flags)

{

       struct dentry *dentry = ERR_PTR(-EEXIST);

       struct nameidata nd;

       int err2;

       int error;

       bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);

       /*

        * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any

        * other flags passed in are ignored!

        */

       lookup_flags &= LOOKUP_REVAL;

       error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);--------------(1.1)

       if (error)

              return ERR_PTR(error);

       /*

        * Yucky last component or no last component at all?

        * (foo/., foo/.., /)

        */

       if (nd.last_type != LAST_NORM)

              goto out;

       nd.flags &= ~LOOKUP_PARENT;

       nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;

       /* don't fail immediately if it's r/o, at least try to report other errors */

       err2 = mnt_want_write(nd.path.mnt);

       /*

        * Do the final lookup.

        */

       mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);

       dentry = lookup_hash(&nd);-----------------------------------------------------------------------------(1.2)

       if (IS_ERR(dentry))

              goto unlock;

       error = -EEXIST;

       if (d_is_positive(dentry))         //return dentry->d_flags & DCACHE_ENTRY_TYPE;

              goto fail;

       /*

        * Special case - lookup gave negative, but... we had foo/bar/

        * From the vfs_mknod() POV we just have a negative dentry -

        * all is fine. Let's be bastards - you had / on the end, you've

        * been asking for (non-existent) directory. -ENOENT for you.

        */

       if (unlikely(!is_dir && nd.last.name[nd.last.len])) {

              error = -ENOENT;

              goto fail;

       }

       if (unlikely(err2)) {

              error = err2;

              goto fail;

       }

       *path = nd.path;

       return dentry;

fail:

       dput(dentry);

       dentry = ERR_PTR(error);

unlock:

       mutex_unlock(&nd.path.dentry->d_inode->i_mutex);

       if (!err2)

              mnt_drop_write(nd.path.mnt);

out:

       path_put(&nd.path);

       return dentry;

}

(1.1)完成了对路径dentry的查找工作。do_path_lookup()调用filename_lookup(),然后调用path_lookupat()函数。(1.1)的主要功能既由path_lookupat()来完成:

static int path_lookupat(int dfd, const char *name,

                            unsigned int flags, struct nameidata *nd)

{

       struct file *base = NULL;

       struct path path;

       int err;

       /*

        * Path walking is largely split up into 2 different synchronisation

        * schemes, rcu-walk and ref-walk (explained in

        * Documentation/filesystems/path-lookup.txt). These share much of the

        * path walk code, but some things particularly setup, cleanup, and

        * following mounts are sufficiently divergent that functions are

        * duplicated. Typically there is a function foo(), and its RCU

        * analogue, foo_rcu().

        *

        * -ECHILD is the error number of choice (just to avoid clashes) that

        * is returned if some aspect of an rcu-walk fails. Such an error must

        * be handled by restarting a traditional ref-walk (which will always

        * be able to complete).

        */

       err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);----------------------------(1.1.1)

       if (unlikely(err))

              return err;

       current->total_link_count = 0;

       err = link_path_walk(name, nd);----------------------------------------------------------------------(1.1.2)

       if (!err && !(flags & LOOKUP_PARENT)) {------------------------------------------------------------(1.1.3)

              err = lookup_last(nd, &path);

              while (err > 0) {

                     void *cookie;

                     struct path link = path;

                     err = may_follow_link(&link, nd);

                     if (unlikely(err))

                            break;

                     nd->flags |= LOOKUP_PARENT;

                     err = follow_link(&link, nd, &cookie);

                     if (err)

                            break;

                     err = lookup_last(nd, &path);

                     put_link(nd, &link, cookie);

              }

       }

       if (!err)

              err = complete_walk(nd);

       if (!err && nd->flags & LOOKUP_DIRECTORY) {

              if (!d_can_lookup(nd->path.dentry)) {

                     path_put(&nd->path);

                     err = -ENOTDIR;

              }

       }

       if (base)

              fput(base);

       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {

              path_put(&nd->root);

              nd->root.mnt = NULL;

       }

       return err;

}

(1.1.1)完成路径遍历前的nd初始化。nd数据结构如下:

struct nameidata {

      struct path    path;

      struct qstr     last;

      struct path    root;

      struct inode  *inode; /* path.dentry.d_inode */

      unsigned int flags;

      unsigned       seq, m_seq;

      int          last_type;

      unsigned       depth;

      char *saved_names[MAX_NESTED_LINKS + 1];

};

(1.1.2)是个庞大的函数;涉及到vfs里的很多数据结构。(1.1.2)的示例代码:

link_path_walk(const char *name, struct nameidata *nd)

       while (*name=='/')                                              //滤掉路径开头的''

              name++;

       if (!*name)

              return 0;

       for(;;) {

              may_lookup(nd)                                            //检查权限

              len = hash_name(name, &this.hash)            //计算hash值

              type = LAST_NORM;                                     //正常分量

       or   

              type = LAST_DOTDOT;                                  //".."分量

       or   

              type = LAST_DOT;                                         //"."分量

              }            

             

              nd->last = this;

              nd->last_type = type;

             

              if (!name[len])                                       //处理"/x/y"类型的路径;已经是最后分量,需要结束查找

                     return 0;                                          //name[3] = '/'

                                                                          

                                                                             //处理 "/systemp.txt"类型路径

              do {                                                         //抛弃连续的'/'

                     len++;                                              //len=4  "usr/mydir/tmp.txt"

              } while (unlikely(name[len] == '/'));       //name[4] = 'm'

              if (!name[len])                                       //检测是否到最后分量

                     return 0;                                          //对于"/usr/"路径, 此时已到路径尾

              name += len;                                          //name = "mydir/tmp.txt"

              //查找(构建)某分量的dentry      

★          err = walk_component(nd, &next, LOOKUP_FOLLOW);   //构建本路径分量的dentry和inode

                                                        //实际上,先有dentry,然后构建inode;遇到mount_point,自动解析

              if (err) {                                                  //返回1,即LOOKUP_FOLLOW。表示要解析链接文件

                     err = nested_symlink(&next, nd);  

              }

然后是walk_component()函数:

walk_component()

       err = lookup_fast(nd, path, &inode)             //快速查找: 命中cache就成功,否则返回1,重新构建;

                                                                             //期间遇到mount_point, 会自动解析

              __d_lookup_rcu(parent, &nd->last, &seq)

       lookup_slow(nd, path)                                  //cache未命中,则执行此调用

              __lookup_hash(&nd->last, parent, nd->flags)

                     lookup_dcache(name, base, flags, &need_lookup)

                            d_alloc(dir, name)                                 //构建dentry

                     lookup_real(base->d_inode, dentry, flags)         

                            dir->i_op->lookup(dir, dentry, flags)      //构建inode

       path_to_nameidata(path, nd)                                    //更新nd

2.2.2 构建目录

再看(2):

static int create_path(const char *nodepath)

{

       char *path;

       char *s;

       int err = 0;

       /* parent directories do not exist, create them */

       path = kstrdup(nodepath, GFP_KERNEL);

       if (!path)

              return -ENOMEM;

       s = path;

       for (;;) {

              s = strchr(s, '/');

              if (!s)

                     break;

              s[0] = '\0';

              err = dev_mkdir(path, 0755);

              if (err && err != -EEXIST)

                     break;

              s[0] = '/';

              s++;

       }

       kfree(path);

       return err;

}

dev_mkdir(path, 0755)函数是核心:

static int dev_mkdir(const char *name, umode_t mode)

{

       struct dentry *dentry;

       struct path path;

       int err;

       dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);

       if (IS_ERR(dentry))

              return PTR_ERR(dentry);

       err = vfs_mkdir(path.dentry->d_inode, dentry, mode);

       if (!err)

              /* mark as kernel-created inode */

              dentry->d_inode->i_private = &thread;

       done_path_create(&path, dentry);

       return err;

}

该函数同样是先调用了kern_path_create()函数,这个函数在前文已经分析过,主要完成路径的查找工作;然后调用vfs_mkdir()完成目录的创建。

vfs_mkdir()将调用shmem_mknod()来完成设备文件的创建工作:

static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)

{

       int error;

       if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))

              return error;

       inc_nlink(dir);

       return 0;

}

进一步调用shmem_mknod()函数,传递的是mode | S_IFDIR,即新建目录。

shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)

{

       struct inode *inode;

       int error = -ENOSPC;

       inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);

       if (inode) {

              error = simple_acl_create(dir, inode);

              if (error)

                     goto out_iput;

              error = security_inode_init_security(inode, dir,

                                               &dentry->d_name,

                                               shmem_initxattrs, NULL);

              if (error && error != -EOPNOTSUPP)

                     goto out_iput;

              error = 0;

              dir->i_size += BOGO_DIRENT_SIZE;

              dir->i_ctime = dir->i_mtime = CURRENT_TIME;

              d_instantiate(dentry, inode);

              dget(dentry); /* Extra count - pin the dentry in core */

       }

       return error;

out_iput:

       iput(inode);

       return error;

}

再深入shmem_get_inode()会发现关键地方在init_special_inode()。

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)

{

       inode->i_mode = mode;

       if (S_ISCHR(mode)) {

              inode->i_fop = &def_chr_fops;

              inode->i_rdev = rdev;

       } else if (S_ISBLK(mode)) {

              inode->i_fop = &def_blk_fops;

              inode->i_rdev = rdev;

       } else if (S_ISFIFO(mode))

              inode->i_fop = &pipefifo_fops;

       else if (S_ISSOCK(mode))

              inode->i_fop = &bad_sock_fops;

       else

              printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"

                              " inode %s:%lu\n", mode, inode->i_sb->s_id,

                              inode->i_ino);

}

字符设备、块设备等等一目了然。在APP打开设备文件时,调用的open()方法将来自于这里。

至此,就完成了目录文件的创建。下面将分析设备文件的创建。

2.2.3 构建设备文件

最后看(4):

第(4)调用的vfs_mknod()其实在第(3)步创建目录已经分析,不同的是(3)传递的是建立目录,而(4)是要建立设备文件,不再敖述。

2.3 二次mount

kdevtmpfs进程的运行,虽然完成了设备文件的管理工作,但是却没有向用户公开。而devtmpfs的二次mount解决了该问题。

二次mount的设备即为”devtmpfs”,挂接点是”/dev”。

SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,

              char __user *, type, unsigned long, flags, void __user *, data)

{

       int ret;

       char *kernel_type;

       struct filename *kernel_dir;

       char *kernel_dev;

       unsigned long data_page;

       ret = copy_mount_string(type, &kernel_type);

       if (ret < 0)

              goto out_type;

       kernel_dir = getname(dir_name);

       if (IS_ERR(kernel_dir)) {

              ret = PTR_ERR(kernel_dir);

              goto out_dir;

       }

       ret = copy_mount_string(dev_name, &kernel_dev);

       if (ret < 0)

              goto out_dev;

       ret = copy_mount_options(data, &data_page);

       if (ret < 0)

              goto out_data;

       ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,

              (void *) data_page);

       free_page(data_page);

out_data:

       kfree(kernel_dev);

out_dev:

       putname(kernel_dir);

out_dir:

       kfree(kernel_type);

out_type:

       return ret;

}

显然主要工作集中于do_mount(),该函数又经过层层调用,最后分别调用了两个函数:

vfs_kern_mount(type, flags, name, data)---------------------------------------(A)

do_add_mount(real_mount(mnt), path, mnt_flags)--------------------------(B)

(A)主要完成文件系统的初始化,但因为前面初始化时,已经对fs进行了初始化,因此此次调用只需要新建vfsmount结构即可。

(B)主要完成vfsmount结构的注册,注册完成后更新mount_hashtable。

至此,用户才可以在/dev/目录下使用各个设备。

3 与统一设备模型的接口

统一设备模型里,几乎任何总线的任何设备,只要发生注册操作,即device_add()操作,就有可能调用devtmpfs留给统一设备模型的接口devtmpfs_create_node().

devtmpfs_create_node()只与进程kdevtmpfs进程进行交互,交互的方法即是通过struct req结构:

int devtmpfs_create_node(struct device *dev)

{

       const char *tmp = NULL;

       struct req req;

       if (!thread)

              return 0;

       req.mode = 0;

       req.uid = GLOBAL_ROOT_UID;

       req.gid = GLOBAL_ROOT_GID;

       req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp);

       if (!req.name)

              return -ENOMEM;

       if (req.mode == 0)

              req.mode = 0600;

       if (is_blockdev(dev))

              req.mode |= S_IFBLK;

       else

              req.mode |= S_IFCHR;

       req.dev = dev;

       init_completion(&req.done);

       spin_lock(&req_lock);

       req.next = requests;

       requests = &req;

       spin_unlock(&req_lock);

       wake_up_process(thread);                    //即叫醒devtmpfsd守护进程

       wait_for_completion(&req.done);

       kfree(tmp);

       return req.err;

}

进程被唤醒后,即着手建立设备文件。建立设备文件的过程,前文已经分析。

你可能感兴趣的:(linux,driver,linux)