VFS超级块是根据具体文件系统的超级块建立起来的内存结构:
struct super_block {
struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_blocksize_bits;
unsigned long s_blocksize;
loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type;指向对应的文件系统对象
const struct super_operations *s_op;指向具体文件系统超级块操作函数
const struct dquot_operations *dq_op;
const struct quotactl_ops *s_qcop;
const struct export_operations *s_export_op;
unsigned long s_flags;
unsigned long s_magic;
struct dentry *s_root;
struct rw_semaphore s_umount;
int s_count;
atomic_t s_active;
#ifdef CONFIG_SECURITY
void *s_security;
#endif
const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
unsigned int s_quota_types; /* Bitmask of supported quota types */
struct quota_info s_dquot; /* Diskquota specific options */
struct sb_writers s_writers;
char s_id[32]; /* Informational name */
u8 s_uuid[16]; /* UUID */
void *s_fs_info; /* Filesystem private info */指向具体文件系统的超级块内存对象,就是ext4_sb_info
unsigned int s_max_links;
fmode_t s_mode;
/* Granularity of c/m/atime in ns.
Cannot be worse than a second */
u32 s_time_gran;
/*
* The next field is for VFS *only*. No filesystems have any business
* even looking at it. You had been warned.
*/
struct mutex s_vfs_rename_mutex; /* Kludge */
/*
* Filesystem subtype. If non-empty the filesystem type field
* in /proc/mounts will be "type.subtype"
*/
char *s_subtype;
/*
* Saved mount options for lazy filesystems using
* generic_show_options()
*/
char __rcu *s_options;
const struct dentry_operations *s_d_op; /* default d_op for dentries */
/*
* Saved pool identifier for cleancache (-1 means none)
*/
int cleancache_poolid;
struct shrinker s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
/* Being remounted read-only */
int s_readonly_remount;
/* AIO completions deferred from interrupt context */
struct workqueue_struct *s_dio_done_wq;
struct hlist_head s_pins;
/*
* Keep the lru lists last in the structure so they always sit on their
* own individual cachelines.
*/
struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
struct rcu_head rcu;
/*
* Indicates how deep in a filesystem stack this SB is
*/
int s_stack_depth;
};
当内核需要挂载(mount)一个块设备时,可以从分区表中信息得知这个块设备的文件系统类型,从文章EXT4文件系统学习(八)磁盘结构可以看出分区信息中的文件系统类型,也可以从分区的superblock信息中看出文件系统类型。
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
.mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext4");
然后从file_system_type文件系统对象链表中找到对应的文件系统驱动程序的文件系统对象,调用里面的mount()函数获取具体的文件系统超级块信息。然后根据这些信息初始化VFS超级块,结构中的s_fs_info就指向具体文件系统的超级块内存对象,也就是ext4_sb_info。
由于各个文件系统的超级块不同,所以对操作超级块的方法也不同。为此内核定义了一个super_operations结构,定义如下:
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);分配一个inode结构
void (*destroy_inode)(struct inode *);释放一个inode结构
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
int (*drop_inode) (struct inode *);
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_super) (struct super_block *);
int (*freeze_fs) (struct super_block *);
int (*thaw_super) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
int (*show_devname)(struct seq_file *, struct dentry *);
int (*show_path)(struct seq_file *, struct dentry *);
int (*show_stats)(struct seq_file *, struct dentry *);
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
struct dquot **(*get_dquots)(struct inode *);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
long (*nr_cached_objects)(struct super_block *,
struct shrink_control *);
long (*free_cached_objects)(struct super_block *,
struct shrink_control *);
};
可以看出super_operations结构中的函数指针都是在操作下层文件系统,不同的文件系统super_operations也是不同的。
当内核挂载块设备时,会根据分区表读出文件系统类型信息,然后找到驱动中对应的已经注册过的文件系统对象,并调用它的mount函数设置s_op指针。
ext4文件系统的mount函数是ext4_mount,里面调用了ext4_fill_super函数会把磁盘上数据读出,装载磁盘和内存超级块以及VFS超级块。(装载磁盘和内存超级块可参考11节里面介绍ext4_fill_super函数)
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
这里比较重要的是设置s_op指针:
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
sb->s_op = &ext4_sops;
这样就建立起了抽象的VFS超级块对象与具体ext4超级块对象的联系。
操作具体文件系统的操作函数ext4_sops如下:
static const struct super_operations ext4_sops = {
.alloc_inode = ext4_alloc_inode,
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
.drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
.put_super = ext4_put_super,
.sync_fs = ext4_sync_fs,
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
.show_options = ext4_show_options,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
.get_dquots = ext4_get_dquots,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
};
ext4_fill_super函数最后会请求读取根目录的inode,调用
#define EXT4_ROOT_INO 2 /* Root inode */
root = ext4_iget(sb, EXT4_ROOT_INO);
继续分析iget函数,先去inode哈希链表缓存里面查找,没有的话就分配一个,分配不带指定inode号,所以这里必须在在表里面查找成功,但是根目录的inode号什么时候加载到内存inode表里面的?
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct inode *inode;
inode = iget_locked(sb, ino);
挂载文件系统根目录时,根目录的inode号肯定不在哈希链表中,所以需要新分配一个, 分配后再去链表中查找inode为2的号, 没有找到的话就把根目录号赋值给新分配的inode,且标志设置为I_NEW,
struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
struct inode *inode;
spin_lock(&inode_hash_lock);
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
if (inode) {
wait_on_inode(inode);
return inode;
}
inode = alloc_inode(sb);
if (inode) {
struct inode *old;
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
old = find_inode_fast(sb, head, ino);
if (!old) {
inode->i_ino = ino;
spin_lock(&inode->i_lock);
inode->i_state = I_NEW;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
inode_sb_list_add(inode);
spin_unlock(&inode_hash_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
*/
return inode;
}
iget_locked把根目录的inode返回后,VFS inode就已经分配好了;这时候通过宏EXT4_I转换得到EXT4 内存inode结构。
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_inode_info *ei;
struct inode *inode;
inode = iget_locked(sb, ino);
ei = EXT4_I(inode);
获取到inode号信息后就可以读取磁盘上面逻辑的inode数据,读取方法:
根据inode号获取出属于哪个块组,然后根据inode在块组内的偏移计算出块inode在哪个块内,最后把块数据读出到buffer_head中,然后再根据块内偏移获取得到磁盘inode数据:
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;磁盘inode
struct ext4_inode_info *ei;内存inode
struct inode *inode;VFS inode
inode = iget_locked(sb, ino);
ei = EXT4_I(inode);
__ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc, int in_mem)
iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((inode->i_ino - 1) %
EXT4_INODES_PER_GROUP(sb));
block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
bh = sb_getblk(sb, block);
iloc->bh = bh;
raw_inode = ext4_raw_inode(&iloc);
后面根据逻辑raw_inode设置内存inode和VFS inode:
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
设置i_op和i_fop指针,这些个函数指针都是操作下层具体文件系统。
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (ext4_inode_is_fast_symlink(inode) &&
!ext4_encrypted_inode(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
} else {
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &ext4_special_inode_operations;
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
} else if (ino == EXT4_BOOT_LOADER_INO) {
make_bad_inode(inode);
} else {
ret = -EIO;
EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
VFS超级块介绍完毕,下一篇介绍VFS inode。