转自:devtmpfs分析_zdy0_2004的博客-CSDN博客
1 初始化
1.1 文件系统注册
2 运行
2.1 devtmpfsd进程
2.2 创建设备文件
2.2.1 遍历路径
2.2.2 构建目录
2.2.3 构建设备文件
2.2 二次mount
3 与统一设备模型的接口
devtmpfs主要完成了对设备文件创建的管理工作。它是统一设备模型的基础之一。该fs在初始化过程中分为两个阶段。在内核启动阶段,完成了fs的注册以及后台进程的创建;在mev或udev或某些启动命令下,完成了fs的二次mount。 参考内核版本为3.17内核。
1 初始化
与devtmpfs相关的代码位于drivers\base\devtmpfs.c文件。devtmpfs文件系统的初始化同驱动初始化同时进行,调用路径如下:
do_basic_setup()-->driver_init()-->devtmpfs_init()。
devtmpfs_init()函数一方面完成了文件系统的注册;另一方启动了一个进程kdevtmpfs。
int __init devtmpfs_init(void)
{
int err = register_filesystem(&dev_fs_type);
if (err) {
printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
"type %i\n", err);
return err;
}
thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
if (!IS_ERR(thread)) {
wait_for_completion(&setup_done);
} else {
err = PTR_ERR(thread);
thread = NULL;
}
if (err) {
printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
unregister_filesystem(&dev_fs_type);
return err;
}
printk(KERN_INFO "devtmpfs: initialized\n");
return 0;
}
1.1 文件系统注册
devtmpfs文件系统的注册由register_filesystem(&dev_fs_type)完成。
文件系统类型定义:
static struct file_system_type dev_fs_type = {
.name = "devtmpfs",
.mount = dev_mount,
.kill_sb = kill_litter_super,
};
2 运行
在内核的初始化阶段完成后,devtmpfs就已经可以被统一设备模型子系统使用,但是还不能被用户使用。
2.1 devtmpfsd进程
static int devtmpfsd(void *p)
{
char options[] = "mode=0755";
int *err = p;
*err = sys_unshare(CLONE_NEWNS);
if (*err)
goto out;
*err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options); //第一次mount
if (*err)
goto out;
sys_chdir("/.."); /* will traverse into overmounted root */
sys_chroot(".");
complete(&setup_done);
while (1) {
spin_lock(&req_lock);
while (requests) {
struct req *req = requests;
requests = NULL;
spin_unlock(&req_lock);
while (req) {
struct req *next = req->next;
req->err = handle(req->name, req->mode,
req->uid, req->gid, req->dev);
complete(&req->done);
req = next;
}
spin_lock(&req_lock);
}
__set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&req_lock);
schedule(); //睡眠该进程:等待create或delete方法来激活进程
}
return 0;
out:
complete(&setup_done);
return *err;
}
该进程在fs初始化时创建。主要完成了fs的第一次mount工作,然后进入while循环,在循环体内部,设置进程状态为TASK_INTERRUPTIBLE,换出进程,等待被唤醒。
kdevtmpfs进程被唤醒离不开数据结构req:
static struct req {
struct req *next;
struct completion done;
int err;
const char *name;
umode_t mode; /* 0 => delete */
kuid_t uid;
kgid_t gid;
struct device *dev;
} *requests;
定义了struct req类型的requests变量;客户进程通过构建req,并插入request链表来请求建立设备文件的服务。
req结构体的name成员即为设备文件的路径名,然而路径名是不带/dev前缀。比如”/dev/input/eventX”文件建立时,传递给devtmpfs的路径名却是”input/eventX”。理解这点涉及到vfs和进程的概念。
2.2 创建设备文件
当有客户进程需要创建设备文件,就会唤醒devtmpfsd进程。该进程会执行handle(req->name, req->mode,req->uid, req->gid, req->dev)操作。最终调用static int handle_create()函数。
static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
kgid_t gid, struct device *dev)
{
struct dentry *dentry;
struct path path;
int err;
dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);-----------------------(1)
if (dentry == ERR_PTR(-ENOENT)) {
create_path(nodename);-------------------------------------------------------------------(2)
dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);------------------(3)
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
err = vfs_mknod(path.dentry->d_inode, dentry, mode, dev->devt);-------------------(4)
if (!err) {
struct iattr newattrs;
newattrs.ia_mode = mode;
newattrs.ia_uid = uid;
newattrs.ia_gid = gid;
newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
mutex_lock(&dentry->d_inode->i_mutex);
notify_change(dentry, &newattrs, NULL);
mutex_unlock(&dentry->d_inode->i_mutex);
/* mark as kernel-created inode */
dentry->d_inode->i_private = &thread;
}
done_path_create(&path, dentry);
return err;
}
(1)负责查找父路径的dentry;(2)负责构建目录;(4)负责构建目标设备文件。
2.2.1 遍历路径
先看(1):
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct nameidata nd;
int err2;
int error;
bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
/*
* Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
* other flags passed in are ignored!
*/
lookup_flags &= LOOKUP_REVAL;
error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);--------------(1.1)
if (error)
return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /)
*/
if (nd.last_type != LAST_NORM)
goto out;
nd.flags &= ~LOOKUP_PARENT;
nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(nd.path.mnt);
/*
* Do the final lookup.
*/
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);-----------------------------------------------------------------------------(1.2)
if (IS_ERR(dentry))
goto unlock;
error = -EEXIST;
if (d_is_positive(dentry)) //return dentry->d_flags & DCACHE_ENTRY_TYPE;
goto fail;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
error = -ENOENT;
goto fail;
}
if (unlikely(err2)) {
error = err2;
goto fail;
}
*path = nd.path;
return dentry;
fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (!err2)
mnt_drop_write(nd.path.mnt);
out:
path_put(&nd.path);
return dentry;
}
(1.1)完成了对路径dentry的查找工作。do_path_lookup()调用filename_lookup(),然后调用path_lookupat()函数。(1.1)的主要功能既由path_lookupat()来完成:
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
struct file *base = NULL;
struct path path;
int err;
/*
* Path walking is largely split up into 2 different synchronisation
* schemes, rcu-walk and ref-walk (explained in
* Documentation/filesystems/path-lookup.txt). These share much of the
* path walk code, but some things particularly setup, cleanup, and
* following mounts are sufficiently divergent that functions are
* duplicated. Typically there is a function foo(), and its RCU
* analogue, foo_rcu().
*
* -ECHILD is the error number of choice (just to avoid clashes) that
* is returned if some aspect of an rcu-walk fails. Such an error must
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);----------------------------(1.1.1)
if (unlikely(err))
return err;
current->total_link_count = 0;
err = link_path_walk(name, nd);----------------------------------------------------------------------(1.1.2)
if (!err && !(flags & LOOKUP_PARENT)) {------------------------------------------------------------(1.1.3)
err = lookup_last(nd, &path);
while (err > 0) {
void *cookie;
struct path link = path;
err = may_follow_link(&link, nd);
if (unlikely(err))
break;
nd->flags |= LOOKUP_PARENT;
err = follow_link(&link, nd, &cookie);
if (err)
break;
err = lookup_last(nd, &path);
put_link(nd, &link, cookie);
}
}
if (!err)
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!d_can_lookup(nd->path.dentry)) {
path_put(&nd->path);
err = -ENOTDIR;
}
}
if (base)
fput(base);
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
return err;
}
(1.1.1)完成路径遍历前的nd初始化。nd数据结构如下:
struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq, m_seq;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
};
(1.1.2)是个庞大的函数;涉及到vfs里的很多数据结构。(1.1.2)的示例代码:
link_path_walk(const char *name, struct nameidata *nd)
while (*name=='/') //滤掉路径开头的''
name++;
if (!*name)
return 0;
for(;;) {
may_lookup(nd) //检查权限
len = hash_name(name, &this.hash) //计算hash值
type = LAST_NORM; //正常分量
or
type = LAST_DOTDOT; //".."分量
or
type = LAST_DOT; //"."分量
}
nd->last = this;
nd->last_type = type;
if (!name[len]) //处理"/x/y"类型的路径;已经是最后分量,需要结束查找
return 0; //name[3] = '/'
//处理 "/systemp.txt"类型路径
do { //抛弃连续的'/'
len++; //len=4 "usr/mydir/tmp.txt"
} while (unlikely(name[len] == '/')); //name[4] = 'm'
if (!name[len]) //检测是否到最后分量
return 0; //对于"/usr/"路径, 此时已到路径尾
name += len; //name = "mydir/tmp.txt"
//查找(构建)某分量的dentry
★ err = walk_component(nd, &next, LOOKUP_FOLLOW); //构建本路径分量的dentry和inode
//实际上,先有dentry,然后构建inode;遇到mount_point,自动解析
if (err) { //返回1,即LOOKUP_FOLLOW。表示要解析链接文件
err = nested_symlink(&next, nd);
}
然后是walk_component()函数:
walk_component()
err = lookup_fast(nd, path, &inode) //快速查找: 命中cache就成功,否则返回1,重新构建;
//期间遇到mount_point, 会自动解析
__d_lookup_rcu(parent, &nd->last, &seq)
lookup_slow(nd, path) //cache未命中,则执行此调用
__lookup_hash(&nd->last, parent, nd->flags)
lookup_dcache(name, base, flags, &need_lookup)
d_alloc(dir, name) //构建dentry
lookup_real(base->d_inode, dentry, flags)
dir->i_op->lookup(dir, dentry, flags) //构建inode
path_to_nameidata(path, nd) //更新nd
2.2.2 构建目录
再看(2):
static int create_path(const char *nodepath)
{
char *path;
char *s;
int err = 0;
/* parent directories do not exist, create them */
path = kstrdup(nodepath, GFP_KERNEL);
if (!path)
return -ENOMEM;
s = path;
for (;;) {
s = strchr(s, '/');
if (!s)
break;
s[0] = '\0';
err = dev_mkdir(path, 0755);
if (err && err != -EEXIST)
break;
s[0] = '/';
s++;
}
kfree(path);
return err;
}
dev_mkdir(path, 0755)函数是核心:
static int dev_mkdir(const char *name, umode_t mode)
{
struct dentry *dentry;
struct path path;
int err;
dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
if (!err)
/* mark as kernel-created inode */
dentry->d_inode->i_private = &thread;
done_path_create(&path, dentry);
return err;
}
该函数同样是先调用了kern_path_create()函数,这个函数在前文已经分析过,主要完成路径的查找工作;然后调用vfs_mkdir()完成目录的创建。
vfs_mkdir()将调用shmem_mknod()来完成设备文件的创建工作:
static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int error;
if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
return error;
inc_nlink(dir);
return 0;
}
进一步调用shmem_mknod()函数,传递的是mode | S_IFDIR,即新建目录。
shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
int error = -ENOSPC;
inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
error = simple_acl_create(dir, inode);
if (error)
goto out_iput;
error = security_inode_init_security(inode, dir,
&dentry->d_name,
shmem_initxattrs, NULL);
if (error && error != -EOPNOTSUPP)
goto out_iput;
error = 0;
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
}
return error;
out_iput:
iput(inode);
return error;
}
再深入shmem_get_inode()会发现关键地方在init_special_inode()。
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
} else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
字符设备、块设备等等一目了然。在APP打开设备文件时,调用的open()方法将来自于这里。
至此,就完成了目录文件的创建。下面将分析设备文件的创建。
2.2.3 构建设备文件
最后看(4):
第(4)调用的vfs_mknod()其实在第(3)步创建目录已经分析,不同的是(3)传递的是建立目录,而(4)是要建立设备文件,不再敖述。
2.3 二次mount
kdevtmpfs进程的运行,虽然完成了设备文件的管理工作,但是却没有向用户公开。而devtmpfs的二次mount解决了该问题。
二次mount的设备即为”devtmpfs”,挂接点是”/dev”。
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
struct filename *kernel_dir;
char *kernel_dev;
unsigned long data_page;
ret = copy_mount_string(type, &kernel_type);
if (ret < 0)
goto out_type;
kernel_dir = getname(dir_name);
if (IS_ERR(kernel_dir)) {
ret = PTR_ERR(kernel_dir);
goto out_dir;
}
ret = copy_mount_string(dev_name, &kernel_dev);
if (ret < 0)
goto out_dev;
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
(void *) data_page);
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
putname(kernel_dir);
out_dir:
kfree(kernel_type);
out_type:
return ret;
}
显然主要工作集中于do_mount(),该函数又经过层层调用,最后分别调用了两个函数:
vfs_kern_mount(type, flags, name, data)---------------------------------------(A)
do_add_mount(real_mount(mnt), path, mnt_flags)--------------------------(B)
(A)主要完成文件系统的初始化,但因为前面初始化时,已经对fs进行了初始化,因此此次调用只需要新建vfsmount结构即可。
(B)主要完成vfsmount结构的注册,注册完成后更新mount_hashtable。
至此,用户才可以在/dev/目录下使用各个设备。
3 与统一设备模型的接口
统一设备模型里,几乎任何总线的任何设备,只要发生注册操作,即device_add()操作,就有可能调用devtmpfs留给统一设备模型的接口devtmpfs_create_node().
devtmpfs_create_node()只与进程kdevtmpfs进程进行交互,交互的方法即是通过struct req结构:
int devtmpfs_create_node(struct device *dev)
{
const char *tmp = NULL;
struct req req;
if (!thread)
return 0;
req.mode = 0;
req.uid = GLOBAL_ROOT_UID;
req.gid = GLOBAL_ROOT_GID;
req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp);
if (!req.name)
return -ENOMEM;
if (req.mode == 0)
req.mode = 0600;
if (is_blockdev(dev))
req.mode |= S_IFBLK;
else
req.mode |= S_IFCHR;
req.dev = dev;
init_completion(&req.done);
spin_lock(&req_lock);
req.next = requests;
requests = &req;
spin_unlock(&req_lock);
wake_up_process(thread); //即叫醒devtmpfsd守护进程
wait_for_completion(&req.done);
kfree(tmp);
return req.err;
}
进程被唤醒后,即着手建立设备文件。建立设备文件的过程,前文已经分析。