vfs是在应用程序和真实文件系统之间插了一层
应用
↓
vfs
↓
ext2
怎么体现的?
比如应用执行read时,首先会调用到vfs的sys_read系统调用,然后再由sys_read转至真实文件系统的read函数中。
也就是vfs向上提供统一的系统调用接口,向下提供一个callbak函数指针,由真实文件系统实现vfs的这些系统调用的callbak函数。
类比于tcpip的协议无关接口,vfs好像充当了“文件系统无关接口”
真实文件系统除了需要实现vfs的系统调用callbak函数,还需要实现vfs通用文件模型的一些数据结构,才能正常工作。
vfs 通用文件模型
四大对象及其对应的操作函数集(refer ulk3)
1.超级块对象 super_block 和 super_operations
2.索引节点对象 inode (index node) 和 inode_operations
3.目录项对象 dentry 和 dentry_operations
4.文件对象 file 和 file_operations
1.超级块对象 super_block 和 super_operations
VFS中保存了系统中挂接的文件系统的链表以及这些文件系统对应的VFS超级块。系统启动后所有被初始化的文件系统都要向VFS登记。每个已安装的文件系统由一个VFS 超块表示
include/linux/fs.h
struct super_block {
struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_dirt;
unsigned char s_blocksize_bits;
unsigned long s_blocksize;
loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type;
const struct super_operations *s_op;
const struct dquot_operations *dq_op;
const struct quotactl_ops *s_qcop;
const struct export_operations *s_export_op;
unsigned long s_flags;
unsigned long s_magic;
struct dentry *s_root;
struct rw_semaphore s_umount;
struct mutex s_lock;
int s_count;
atomic_t s_active;
#ifdef CONFIG_SECURITY
void *s_security;
#endif
const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
struct list_head __percpu *s_files;
#else
struct list_head s_files;
#endif
/* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
struct block_device *s_bdev;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct list_head s_instances;
struct quota_info s_dquot; /* Diskquota specific options */
int s_frozen;
wait_queue_head_t s_wait_unfrozen;
char s_id[32]; /* Informational name */
u8 s_uuid[16]; /* UUID */
void *s_fs_info; /* Filesystem private info */
fmode_t s_mode;
/* Granularity of c/m/atime in ns.
Cannot be worse than a second */
u32 s_time_gran;
/*
* The next field is for VFS *only*. No filesystems have any business
* even looking at it. You had been warned.
*/
struct mutex s_vfs_rename_mutex; /* Kludge */
/*
* Filesystem subtype. If non-empty the filesystem type field
* in /proc/mounts will be "type.subtype"
*/
char *s_subtype;
/*
* Saved mount options for lazy filesystems using
* generic_show_options()
*/
char __rcu *s_options;
const struct dentry_operations *s_d_op; /* default d_op for dentries */
/*
* Saved pool identifier for cleancache (-1 means none)
*/
int cleancache_poolid;
};
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
int (*drop_inode) (struct inode *);
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct vfsmount *);
int (*show_devname)(struct seq_file *, struct vfsmount *);
int (*show_path)(struct seq_file *, struct vfsmount *);
int (*show_stats)(struct seq_file *, struct vfsmount *);
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
};
2.索引节点对象 inode (index node) 和 inode_operations
唯一的标识磁盘上的一个文件(目录也是文件),描述文件的物理属性。
include/linux/fs.h
struct inode {
/* RCU path lookup touches following: */
umode_t i_mode;
uid_t i_uid;
gid_t i_gid;
const struct inode_operations *i_op;
struct super_block *i_sb;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned int i_flags;
unsigned long i_state;
#ifdef CONFIG_SECURITY
void *i_security;
#endif
struct mutex i_mutex;
unsigned long dirtied_when; /* jiffies of first dirtying */
struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
union {
struct list_head i_dentry;
struct rcu_head i_rcu;
};
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
dev_t i_rdev;
unsigned int i_blkbits;
u64 i_version;
loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
struct rw_semaphore i_alloc_sem;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
struct address_space *i_mapping;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
};
__u32 i_generation;
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
struct hlist_head i_fsnotify_marks;
#endif
#ifdef CONFIG_IMA
atomic_t i_readcount; /* struct files open RO */
#endif
atomic_t i_writecount;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
void *i_private; /* fs or device private pointer */
};
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
void * (*follow_link) (struct dentry *, struct nameidata *);
int (*permission) (struct inode *, int, unsigned int);
int (*check_acl)(struct inode *, int, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
void (*put_link) (struct dentry *, struct nameidata *, void *);
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
int (*mkdir) (struct inode *,struct dentry *,int);
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct inode *,struct dentry *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
void (*truncate) (struct inode *);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
} ____cacheline_aligned;
3.目录项对象 dentry 和 dentry_operations
表示逻辑意义上的文件(目录也是文件),描述文件的逻辑属性。
路径查找时用到
include/linux/dcache.h
struct dentry {
/* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
seqcount_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
/* Ref lookup also touches following */
unsigned int d_count; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; /* fs-specific data */
struct list_head d_lru; /* LRU list */
/*
* d_child and d_rcu can share memory
*/
union {
struct list_head d_child; /* child of parent list */
struct rcu_head d_rcu;
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
};
struct dentry_operations {
int (*d_revalidate)(struct dentry *, struct nameidata *);
int (*d_hash)(const struct dentry *, const struct inode *,
struct qstr *);
int (*d_compare)(const struct dentry *, const struct inode *,
const struct dentry *, const struct inode *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(struct dentry *, bool);
} ____cacheline_aligned;
4. 文件对象 file 和 file_operations
描述进程打开的某个文件
include/linux/fs.h
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
* fu_rcuhead for RCU freeing
*/
union {
struct list_head fu_list;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
#ifdef CONFIG_SMP
int f_sb_list_cpu;
#endif
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
};
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
};
文件的打开读写流程:
首先在当前进程的文件描述表fdtale中分配一个空的文件描述符fd , 然后在filp_cachep中创建一个file struct , 调用do_path_lookup()找的文件的inode ,取出inode的文件操作方法file_operations赋给file struct ,并调用f->f_op->open()执行打开的操作. 最后根据根据文件描述符fd将file安装在当前进程文件描述表fdtable对应的位置.
这样在以后读写文件操作时, 只需要根据文件描述符fd,就可以在当前进程的描述表中找到该文件的file对象,然后调用f->f_op操作方法对文件进行操作.
最重要的是要明白file结构体的file_operations来自inode结构体的file_operations。而inode结构体的file_operations一般定义在各个文件系统目录下的file.c中,比如fs/ext2/file.c
根文件系统的注册和挂载:refer to ulk3
分两阶段
1.特殊rootfs的注册和挂载
start_kernel |
init/main.c |
vfs_caches_init |
fs/dcatch.c |
mnt_init |
fs/namespace.c |
init_rootfs |
fs/ramfs/inode.c |
register_filesystem(&rootfs_fs_type); |
|
init_mount_tree |
fs/namespace.c |
do_kern_mount("rootfs", 0, "rootfs", NULL); |
fs/namespace.c |
vfs_kern_mount |
fs/namespace.c |
mount_fs |
fs/super.c |
调用rootfs file_system_type 的mount函数 rootfs_mount |
|
2.实际根文件系统的注册和挂载(即uboot通过root=传递进来的设备)
start_kernel |
init/main.c |
rest_init |
init/main.c |
kernel_init |
init/main.c |
prepare_namespace |
init/do_mounts.c |
create_dev("/dev/root", ROOT_DEV);//uboot传来ROOT_DEV(root=) |
init/do_mounts.c |
mount_root |
init/do_mounts.c |
mount_block_root("/dev/root", root_mountflags); |
init/do_mounts.c |
do_mount_root |
init/do_mounts.c |
sys_mount |
fs/namespace.c |
接下来跟普通文件系统的挂载流程类似,见下面 |
|
普通文件系统的注册和挂载:
sys_mount |
fs/namespace.c |
do_mount |
fs/namespace.c |
do_kern_mount |
fs/namespace.c |
vfs_kern_mount |
fs/namespace.c |
mount_fs |
fs/super.c |
调用真实文件系统 file_system_type 的mount函数 rootfs_mount |
|