linux下omfs文件系统的硬盘布局

文件系统是linux内核的重要组成部分,涉及到vfs、块IO层的调度机制,块设备驱动以及具体文件系统所采用的数据结构。所使用linux内核版本是2.6.34.1。

 

以fs/omfs为例,主要学习vfs的实现,omfs的硬盘布局,它所采用的数据结构为何能够优化MPEG文件系统。Omfs文件系统的具体文件读写又是如何实现的。

 

OMFS:Optimized MPEG Filesystem

OMFS是由SonicBlue公司创建的用于ReplayTV DVR和MP3 player的文件系统。该文件系统是基于extent的(现代很多文件系统都采用extent替代block来管理磁盘。Extent就是一些连续的block,可以有效减少元数据开销。),可用的block大小在2k到8k之间,目录结构是基于hash的。

 

该文件系统在特定的流媒体设备中性能很好,但对于一般的应用,linux主流的文件系统应该性能更优。Omfs是如何针对MPEG做性能优化,这点还在探索中。

 

硬盘布局格式:

 

Omfs区分sysblocks和一般的数据blocks。Sysblock group由superblock信息、文件的metadata元数据、目录结构和extents构成。每一个sysblock都有一个包含CRC校验的头,而且可以在硬盘上备份。Sysblock大小比一个数据block小,但是它们都用64位的块号寻址。

 

Sysblock 头信息:

struct omfs_header { __be64 h_self;                  /* FS block where this is located */ __be32 h_body_size;             /* size of useful data after header */ __be16 h_crc;                   /* crc-ccitt of body_size bytes */

        char h_fill1[2];

        u8 h_version;                   /* version, always 1 */

        char h_type;                    /* OMFS_INODE_X */

        u8 h_magic;                     /* OMFS_IMAGIC */

        u8 h_check_xor;                 /* XOR of header bytes before this */ __be32 h_fill2; };

 

文件和目录都由omfs_inode表示:

struct omfs_inode { struct omfs_header i_head;      /* header */

        __be64 i_parent;                /* parent containing this inode */ __be64 i_sibling;               /* next inode in hash bucket */

        __be64 i_ctime;                 /* ctime, in milliseconds */ char i_fill1[35];

        char i_type;                    /* OMFS_[DIR,FILE] */

        __be32 i_fill2;

        char i_fill3[64];

        char i_name[OMFS_NAMELEN];      /* filename */

        __be64 i_size;                  /* size of file, in bytes */ };

 

OMFS中的目录是一个大的hash表。文件名经过hash计算,然后放到以OMFS_DIR_START开始的桶中。查找的时候需要hash文件名,然后通过i_sibling指针查找到匹配的i_name。

 

文件以omfs_inode结构体开头,后面跟着在OMFS_EXTENT_START开始的extent table。

 

struct omfs_extent_entry {

__be64 e_cluster;               /* start location of a set of blocks */

__be64 e_blocks;                /* number of blocks after e_cluster */

};

struct omfs_extent {

__be64 e_next;                  /* next extent table location */

__be32 e_extent_count;          /* total # extents in this table */

__be32 e_fill;

struct omfs_extent_entry e_entry;       /* start of extent entries */

};

 

通过Mkomfs.c可以了解omfs的基本布局:

在这里我们不使用实际的硬盘,用loop设备模拟一下:

生成一个文件:dd if=/dev/zero of=file.img bs=512 count=10000

10000+0 records in

10000+0 records out

5120000 bytes (5.1 MB) copied, 0.102142 s, 50.1 MB/s

用losetup /dev/loop0 file.img将loop设备和file.img关联上。

再用mkomfs /dev/loop0来布局:

Mkomfs的默认配置参数是:

         fs_config_t config = {

                   .block_size = 8192,

                   .cluster_size = 8,

                   .clear_dev = 0

         };

获取设备的大小:size=bs×count=512×10000=5120000

 

create_fs(fp, size/512, &config);中

block_size = 8192

blocks_per_sector = block_size / SECTOR_SIZE = 8192/512 = 16

blocks = sectors / blocks_per_sector = 10000/16 =625=0x271

 

初始化omfs_super_block结构体:

struct omfs_super_block {

         char s_fill1[192];

         char s_name[OMFS_SUPER_NAMELEN];                    :”omfs”

         __be64 s_root_block;               /* block number of omfs_root_block */    :ROOT_BLK=1

         __be64 s_num_blocks;             /* total number of FS blocks */          :blocks=625=0x271

         __be32 s_magic;                        /* OMFS_MAGIC */           : OMFS_MAGIC 0xC2993D87

         __be32 s_blocksize;                  /* size of a block */           :block_size=8192=0x2000

         __be32 s_mirrors;            /* # of mirrors of system blocks */    :2

         __be32 s_sys_blocksize;          /* size of non-data blocks */     :2048=0x800

};              288个字节

 

hexdump -C -s 0 -n 512 /dev/loop0

00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

000000c0  6f 6d 66 73 00 00 00 00  00 00 00 00 00 00 00 00  |omfs............|

000000d0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00000100  00 00 00 00 00 00 00 01  00 00 00 00 00 00 02 71  |...............q|

00000110  c2 99 3d 87 00 00 20 00  00 00 00 02 00 00 08 00  |..=... .........|

00000120  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00000200

将superblock的288个字节写在最开头,

 

初始化omfs_root_block结构体:

struct omfs_root_block {

         struct omfs_header r_head;   /* header */

         __be64 r_fill1;

         __be64 r_num_blocks;             /* total number of FS blocks */     :blocks=625

         __be64 r_root_dir;           /* block # of root directory */          : ROOT_DIR_BLK=3

         __be64 r_bitmap;             /* block # of free space bitmap */      :BITMAP_BLK 5

         __be32 r_blocksize;                   /* size of a block */           :block_size=8192

         __be32 r_clustersize;               /* size allocated for data blocks */   : cluster_size = 8

         __be64 r_mirrors;            /* # of mirrors of system blocks */      :2

         char r_name[OMFS_NAMELEN];     /* partition label */          :”omfs”

         __be64 r_fill2;

};             336个字节

 

struct omfs_header {

         __be64 h_self;                            /* FS block where this is located */   :ROOT_BLK=1

         __be32 h_body_size;                /* size of useful data after header */  :336-24=312=0x138

         __be16 h_crc;                    /* crc-ccitt of body_size bytes */

         char h_fill1[2];

         u8 h_version;                     /* version, always 1 */        :1

         char h_type;                       /* OMFS_INODE_X */        : OMFS_INODE_SYSTEM=’s’

         u8 h_magic;                        /* OMFS_IMAGIC */          : OMFS_IMAGIC 0xD2

         u8 h_check_xor;                         /* XOR of header bytes before this */

         __be32 h_fill2;

};                    24个字节

 

然后移动到8192个字节,0x2000处,写入rootblock;再移动到0x4000处,再写一遍rootblock。

hexdump -C -s 0x2000 -n 512 /dev/loop0

00002000  00 00 00 00 00 00 00 01  00 00 01 38 00 00 00 00  |...........8....|

00002010  01 73 d2 00 00 00 00 00  00 00 00 00 00 00 00 00  |.s..............|

00002020  00 00 00 00 00 00 02 71  00 00 00 00 00 00 00 03  |.......q........|

00002030  00 00 00 00 00 00 00 05  00 00 20 00 00 00 00 08  |.......... .....|

00002040  00 00 00 00 02 00 00 00  6f 6d 66 73 00 00 00 00  |........omfs....|

00002050  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00002200

 

hexdump -C -s 0x4000 -n 512 /dev/loop0

00004000  00 00 00 00 00 00 00 01  00 00 01 38 00 00 00 00  |...........8....|

00004010  01 73 d2 00 00 00 00 00  00 00 00 00 00 00 00 00  |.s..............|

00004020  00 00 00 00 00 00 02 71  00 00 00 00 00 00 00 03  |.......q........|

00004030  00 00 00 00 00 00 00 05  00 00 20 00 00 00 00 08  |.......... .....|

00004040  00 00 00 00 02 00 00 00  6f 6d 66 73 00 00 00 00  |........omfs....|

00004050  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00004200

 

 

Root directory inode信息:

struct omfs_inode {

         struct omfs_header i_head;    /* header */

         __be64 i_parent;              /* parent containing this inode */    :~0

         __be64 i_sibling;               /* next inode in hash bucket */      :~0

         __be64 i_ctime;                          /* ctime, in milliseconds */

         char i_fill1[35];

         char i_type;                        /* OMFS_[DIR,FILE] */             :’D’

         __be32 i_fill2;                                        :1

         char i_fill3[64];

         char i_name[OMFS_NAMELEN];     /* filename */

         __be64 i_size;                    /* size of file, in bytes */            :2048  

};

 

struct omfs_header {

         __be64 h_self;                            /* FS block where this is located */   : ROOT_DIR_BLK 3

         __be32 h_body_size;                /* size of useful data after header */  :2048-24=2024

         __be16 h_crc;                    /* crc-ccitt of body_size bytes */

         char h_fill1[2];

         u8 h_version;                     /* version, always 1 */                     :1

         char h_type;                       /* OMFS_INODE_X */           : OMFS_INODE_NORMAL 'e'

         u8 h_magic;                        /* OMFS_IMAGIC */        :OMFS_IMAGIC 0xD2

         u8 h_check_xor;                         /* XOR of header bytes before this */

         __be32 h_fill2;

};

 

申请一块大小为2048字节的内存,前面416=0x1a0个字节放root inode,

从OMFS_DIR_START 0x1b8到2048=0x800全部设置为0xff。

将这2048个字节写到0x6000位置,再重复将这2048个字节写到0x8000位置。

 

hexdump -C -s 0x6000 -n 512 /dev/loop0

00006000  00 00 00 00 00 00 00 03  00 00 07 e8 1c 5c 00 00  |.............\..|

00006010  01 65 d2 1a 00 00 00 00  ff ff ff ff ff ff ff ff  |.e..............|

00006020  ff ff ff ff ff ff ff ff  00 00 01 35 5a 8e 42 bb  |...........5Z.B.|

00006030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00006050  00 00 00 44 00 00 00 01  00 00 00 00 00 00 00 00  |...D............|

00006060  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00006190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

000061a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

000061b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

000061c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

00006200

 

 

hexdump -C -s 0x8000 -n 512 /dev/loop0

00008000  00 00 00 00 00 00 00 03  00 00 07 e8 1c 5c 00 00  |.............\..|

00008010  01 65 d2 1a 00 00 00 00  ff ff ff ff ff ff ff ff  |.e..............|

00008020  ff ff ff ff ff ff ff ff  00 00 01 35 5a 8e 42 bb  |...........5Z.B.|

00008030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00008050  00 00 00 44 00 00 00 01  00 00 00 00 00 00 00 00  |...D............|

00008060  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00008190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

000081a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

000081b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

000081c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

00008200

 

 

最后,就是free space bitmap了。

bitmap_size = (swap_be64(super.s_num_blocks) + 7)/8;  =(625+7)/8=79

dirty_size = (bitmap_size + 7)/8;    =(79+7)/8=10

first_blk = BITMAP_BLK + (bitmap_size +

                   swap_be32(super.s_blocksize)-1) / swap_be32(super.s_blocksize);  =5+(79+8192-1)/8192=6

 

因为bitmap的每一位代表一个block,所以可以计算出bitmap的字节数。

Bitmap.bmap申请bitmap_size=79个字节的内存大小

         for (i=0; i<first_blk; i++)

         {

                   bitmap.bmap[i/8] |= 1<<(i & 7);

         }

Bitmap.bmap[0] = 0011 1111  2进制    0x3f

因为blocks 0-5被用了,所以bitmap的相应位都置上1。

Bitmap.dirty申请dirty_size=10个字节的大小,每个字节都置为0xff

将bitmap写入0xa000。(第6个block,block是从block 0开始的,block 0放的是super block)。

hexdump -C -s 0xa000 -n 512 /dev/loop0

0000a000  3f 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |?...............|

0000a010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

0000a050  00 00 00 00 11 00 00 00  ff ff ff ff ff ff ff ff  |................|

0000a060  ff ff 00 00 c9 0c 02 00  00 00 00 00 00 00 00 00  |................|

0000a070  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

0000a190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

0000a1a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

0000a1b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

0000a1c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

0000a200

 

 

到此,omfs文件系统格式化ok了,mount之后就可以进行常见的文件操作了。

 

 

 

你可能感兴趣的:(linux下omfs文件系统的硬盘布局)