普通文件可以用open或者create创建,FIFO文件可以用pipe创建,mknod主要用于设备文件的创建。
在内核中,mknod是由sys_mknod实现的,代码如下:
asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) //比如filename为/tmp/server_socket,dev是设备号 { int error = 0; char * tmp; struct dentry * dentry; struct nameidata nd; if (S_ISDIR(mode)) return -EPERM; tmp = getname(filename); if (IS_ERR(tmp)) return PTR_ERR(tmp); if (path_init(tmp, LOOKUP_PARENT, &nd))//寻找父节点,这里就是/tmp节点 error = path_walk(tmp, &nd); if (error) goto out; dentry = lookup_create(&nd, 0);//寻找/tmp/server_socket节点,返回该节点的dentry结构,但是dentry->d_inode为NULL error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case 0: case S_IFREG://普通文件 error = vfs_create(nd.dentry->d_inode,dentry,mode); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK://字符设备,块设备,管道,socket文件 error = vfs_mknod(nd.dentry->d_inode,mode,dev);//创建/tmp/server_socket节点的inode结构,并关联到文件系统中 break; case S_IFDIR: error = -EPERM; break; default: error = -EINVAL; } dput(dentry); } up(&nd.dentry->d_inode->i_sem); path_release(&nd); out: putname(tmp); return error; }
lookup_create,寻找/tmp/server_socket节点,代码如下:
static struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry; down(&nd->dentry->d_inode->i_sem); dentry = ERR_PTR(-EEXIST); if (nd->last_type != LAST_NORM) goto fail; dentry = lookup_hash(&nd->last, nd->dentry);//nd->last是server_socket if (IS_ERR(dentry)) goto fail; if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) goto enoent; return dentry; enoent: dput(dentry); dentry = ERR_PTR(-ENOENT); fail: return dentry; }
struct dentry * lookup_hash(struct qstr *name, struct dentry * base)//name为server_socket,base为父节点/tmp的dentry结构 { struct dentry * dentry; struct inode *inode; int err; inode = base->d_inode;//父节点/tmp的i节点 err = permission(inode, MAY_EXEC); dentry = ERR_PTR(err); if (err) goto out; /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_op && base->d_op->d_hash) { err = base->d_op->d_hash(base, name); dentry = ERR_PTR(err); if (err < 0) goto out; } dentry = cached_lookup(base, name, 0); if (!dentry) { struct dentry *new = d_alloc(base, name);//创建/tmp/server_socket节点的dentry结构 dentry = ERR_PTR(-ENOMEM); if (!new) goto out; lock_kernel(); dentry = inode->i_op->lookup(inode, new);//dentry为NULL unlock_kernel(); if (!dentry) dentry = new;//刚刚创建的new赋值给dentry,但是dentry->d_inode为NULL else dput(new); } out: return dentry; }vfs_mknod,创建/tmp/server_socket节点的inode结构,并关联到文件系统中,代码如下:
int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)//dir为/tmp父节点的inode结构,dentry为/tmp/server_socket节点的dentry结构 { int error = -EPERM; mode &= ~current->fs->umask; down(&dir->i_zombie); if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))//检验当前进程是否允许创建设备节点,此项检验仅用于待创建节点为设备节点时 goto exit_lock; error = may_create(dir, dentry); if (error) goto exit_lock; error = -EPERM; if (!dir->i_op || !dir->i_op->mknod) goto exit_lock; DQUOT_INIT(dir); lock_kernel(); error = dir->i_op->mknod(dir, dentry, mode, dev);//对于Ext2,这个函数是ext2_mknod unlock_kernel(); exit_lock: up(&dir->i_zombie); if (!error) inode_dir_notify(dir, DN_CREATE); return error; }
may_create,检查目标节点的inode结构是否存在。
static inline int may_create(struct inode *dir, struct dentry *child) { if (child->d_inode)//也就是检查d_inode是否为NULL return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; return permission(dir,MAY_WRITE | MAY_EXEC); }
对于Ext2,dir->i_op->mknod是ext2_mknod,代码如下:
static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) { struct inode * inode = ext2_new_inode (dir, mode);//分配了一个inode结构 int err = PTR_ERR(inode); if (IS_ERR(inode)) return err; inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, inode);//inode关联到文件系统中,也就是通过父节点inode结构,能够找到新创建的子节点的inode结构 if (err) goto out_no_entry; mark_inode_dirty(inode);//新创建的inode结构设置成"脏" d_instantiate(dentry, inode);//将新创建的inode结构与dentry结构相关联 return 0; out_no_entry: inode->i_nlink--; mark_inode_dirty(inode); iput(inode); return err; }
void init_special_inode(struct inode *inode, umode_t mode, int rdev) { inode->i_mode = mode; if (S_ISCHR(mode)) {//字符设备 inode->i_fop = &def_chr_fops; inode->i_rdev = to_kdev_t(rdev); } else if (S_ISBLK(mode)) {//块设备 inode->i_fop = &def_blk_fops; inode->i_rdev = to_kdev_t(rdev); inode->i_bdev = bdget(rdev); } else if (S_ISFIFO(mode))//FIFO设备 inode->i_fop = &def_fifo_fops; else if (S_ISSOCK(mode))//socket设备 inode->i_fop = &bad_sock_fops; else printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode); }由于新创建的inode结构设置成了“脏”,内核在"同步"内存中的inode结构与磁盘上的索引节点的时候,就会将这个inode结构的内容写到磁盘上分配给这个文件的索引节点,即ext2_inode数据结构中。由于ext2_inode结构中并不存在i_rdev这么个成分,而对于设备文件却又不需要使用i_block[]数组,所以就挪用其i_block[0]来保存设备号。要了解这一点,主要看ext2_update_inode代码中一个片段:
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))//FIFO设备和SOCKET设备没有设备号 raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev)); else for (block = 0; block < EXT2_N_BLOCKS; block++) raw_inode->i_block[block] = inode->u.ext2_i.i_data[block];
for (block = 0; block < EXT2_N_BLOCKS; block++) inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; if (inode->i_ino == EXT2_ACL_IDX_INO || inode->i_ino == EXT2_ACL_DATA_INO) /* Nothing to do */ ; else if (S_ISREG(inode->i_mode)) { } else if (S_ISDIR(inode->i_mode)) { } else if (S_ISLNK(inode->i_mode)) { } else init_special_inode(inode, inode->i_mode, le32_to_cpu(raw_inode->i_block[0]));
我们回过头想一想,在Linux内核源代码情景分析-文件系统的安装,/dev/sdb1,就是通过mknod建立的。
还有在Linux内核源代码情景分析-基于socket的进程间通信,/tmp/server_socket,也是通过vfs_mknod创建的。