direntry建立了各级目录和文件之间的联系,inode建立了文件与数据之间联系,但这还不够,还需要记录inode和数据块从哪里开始从哪里结束,以及哪些是空闲的,这样才能正确的分配文件。
块组是一个逻辑概念,把一个分区分成若干个组,用来限制文件的数据块不要过于散落,尽量保证一个文件的数据块在一个组,这样可以提高读写效率,
格式化一个ext4文件系统后在磁盘上的结构:
Group 0: (Blocks 1-8192)
Primary superblock at 1, Group descriptors at 2-2
Reserved GDT blocks at 3-246
Block bitmap at 247 (+246)
Inode bitmap at 251 (+250)
Inode table at 255-499 (+254)
5481 free blocks, 1916 free inodes, 7 directories
Free blocks: 1268-2657, 2866-4096, 5327-6112, 6119-8192
Free inodes: 39, 46-1960
超级块记录着一个分区的整体信息,有多少inode,多少个组,多少个block数据块,多少个空闲块。
Group descriptors组描述符记录了这个组中的inode数量,空闲块数量等信息。
Block bitmap是数据块的位图,记录着数据块的状态。
Inode bitmap是inode的位图,记录着inode的状态。
磁盘超级块结构如下:
/*
* Structure of the super block
*/
struct ext4_super_block {
/*00*/ __le32 s_inodes_count; /* Inodes count */inode总数量
__le32 s_blocks_count_lo; /* Blocks count */block总数量
__le32 s_r_blocks_count_lo; /* Reserved blocks count */预留block总数量
__le32 s_free_blocks_count_lo; /* Free blocks count */空闲block总数量
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */空闲inode总数量
__le32 s_first_data_block; /* First Data Block */第一个数据块的编号
__le32 s_log_block_size; /* Block size */块大小,一般是4KB,可配置
__le32 s_log_cluster_size; /* Allocation cluster size */分片功能没有启用
/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */每个组中多个个块
__le32 s_clusters_per_group; /* # Clusters per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */
/*30*/ __le32 s_wtime; /* Write time */
__le16 s_mnt_count; /* Mount count */被挂载的总次数
__le16 s_max_mnt_count; /* Maximal mount count */最大挂载次数,达到后就会进行文件系统检测
__le16 s_magic; /* Magic signature */
__le16 s_state; /* File system state */文件系统状态,为0表示被mount状态,挂载之前会检测此状态防止重复挂载
__le16 s_errors; /* Behaviour when detecting errors */
__le16 s_minor_rev_level; /* minor revision level */
/*40*/ __le32 s_lastcheck; /* time of last check */上一次文件系统检测的时间
__le32 s_checkinterval; /* max. time between checks */检测的最大间隔
__le32 s_creator_os; /* OS */
__le32 s_rev_level; /* Revision level */
/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
__le16 s_def_resgid; /* Default gid for reserved blocks */
/*
* These fields are for EXT4_DYNAMIC_REV superblocks only.
*
* Note: the difference between the compatible feature set and
* the incompatible feature set is that if there is a bit set
* in the incompatible feature set that the kernel doesn't
* know about, it should refuse to mount the filesystem.
*
* e2fsck's requirements are more strict; if it doesn't know
* about a feature in either the compatible or incompatible
* feature set, it must abort and not try to meddle with
* things it doesn't understand...
*/
__le32 s_first_ino; /* First non-reserved inode */
__le16 s_inode_size; /* size of inode structure */inode占用空间大小,一般是256字节
__le16 s_block_group_nr; /* block group # of this superblock */
__le32 s_feature_compat; /* compatible feature set */
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */UID
/*78*/ char s_volume_name[16]; /* volume name */分区名字
/*88*/ char s_last_mounted[64]; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
* Performance hints. Directory preallocation should only
* happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
*/
__u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
__u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
__le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
/*
* Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
*/
/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
__le32 s_journal_dev; /* device number of journal file */
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
__u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */
/*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */
/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
__le16 s_min_extra_isize; /* All inodes have at least # bytes */
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
__le32 s_flags; /* Miscellaneous flags */
__le16 s_raid_stride; /* RAID stride */
__le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_checksum_type; /* metadata checksum algorithm used */
__u8 s_encryption_level; /* versioning level for encryption */
__u8 s_reserved_pad; /* Padding to next 32bits */
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__le32 s_snapshot_inum; /* Inode number of active snapshot */
__le32 s_snapshot_id; /* sequential ID of active snapshot */
__le64 s_snapshot_r_blocks_count; /* reserved blocks for active
snapshot's future use */
__le32 s_snapshot_list; /* inode number of the head of the
on-disk snapshot list */
#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
__le32 s_error_count; /* number of fs errors */
__le32 s_first_error_time; /* first time an error happened */
__le32 s_first_error_ino; /* inode involved in first error */
__le64 s_first_error_block; /* block involved of first error */
__u8 s_first_error_func[32]; /* function where the error happened */
__le32 s_first_error_line; /* line number where error happened */
__le32 s_last_error_time; /* most recent time of an error */
__le32 s_last_error_ino; /* inode involved in last error */
__le32 s_last_error_line; /* line number where error happened */
__le64 s_last_error_block; /* block involved of last error */
__u8 s_last_error_func[32]; /* function where the error happened */
#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
__u8 s_mount_opts[64];
__le32 s_usr_quota_inum; /* inode for tracking user quota */
__le32 s_grp_quota_inum; /* inode for tracking group quota */
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
__u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
__u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
__le32 s_lpf_ino; /* Location of the lost+found inode */
__le32 s_reserved[100]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
ext4_super_block与磁盘上面的超级块数据基本是对应的,可直接读出到结构体内存。
组描述符在磁盘上的格式如下:
/*
* Structure of a blocks group descriptor
*/
struct ext4_group_desc
{
__le32 bg_block_bitmap_lo; /* Blocks bitmap block */表示bmap在这个组的那一块
__le32 bg_inode_bitmap_lo; /* Inodes bitmap block */表示imap在这个组的那一块
__le32 bg_inode_table_lo; /* Inodes table block */这个组的inode所在起始块号
__le16 bg_free_blocks_count_lo;/* Free blocks count */空闲块数量
__le16 bg_free_inodes_count_lo;/* Free inodes count */空闲inode数量
__le16 bg_used_dirs_count_lo; /* Directories count */
__le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
__le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
__le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
__le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
__le16 bg_itable_unused_lo; /* Unused inodes count */
__le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */
__le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
__le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__le16 bg_used_dirs_count_hi; /* Directories count MSB */
__le16 bg_itable_unused_hi; /* Unused inodes count MSB */
__le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
__le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
__le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
__u32 bg_reserved;
};
组描述符占用64字节,一个块占用4KB,所以每个组里面的组描述符块保存了所有组描述符。应该是为了防止数据损坏进行修复使用。
超级块的备份会在2n-1的组里面,防止数据损坏进行修复。