关于NAT和SIT简介部分请参考
https://blog.csdn.net/u011649400/article/details/106955060
https://blog.csdn.net/u011649400/article/details/102490983
SIT区域的实际构图如上所示。以f2fs_sit_block为单位进行存储,共有2blocks个f2fs_sit_block,其中blocks=sit_info->sit_blocks,即有效块数,剩下的blocks个块为备份。每个f2fs_sit_block的构成如下图所示,每个f2fs_sit_block中有55个f2fs_sit_entry,每个f2fs_sit_entry描述了一个segment的信息。
为什么会有2blocks个块呢? f2fs为了防止宕机对元数据造成不可恢复的损害,所以sit/nat这种元数据有着两个副本,但是这两个副本只有一个是表示最新的数据,f2fs通过保存在cp pack中的sit_nat_version_bitmap来指示哪个才是最新的。
//根据位图获得当前最新版本的segno的f2fs_sit_block
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
block_t blk_addr = sit_i->sit_base_addr + offset;
check_seg_range(sbi, segno);
/* calculate sit block address */
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
return get_meta_page(sbi, blk_addr);
}
从上述函数中可以看出unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
这里#define SIT_BLOCK_OFFSET(sit_i, segno) (segno / SIT_ENTRY_PER_BLOCK),即offset为segno与起始段号的块偏移。
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
如果在位图上置位了,则后一个f2fs_sit_block才是最新的,而后一个f2fs_sit_block等于前一个f2fs_sit_block+总有效块数blocks,所以跟上面的物理布局是对应的。
下面这个函数是计算block_addr处的f2fs_sit_block的备份块的地址。
static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
pgoff_t block_addr)
{
struct sit_info *sit_i = SIT_I(sbi);
block_addr -= sit_i->sit_base_addr;
if (block_addr < sit_i->sit_blocks)//如果此block在前面的SIT中
block_addr += sit_i->sit_blocks;
else
block_addr -= sit_i->sit_blocks;//如果此block在后面的SIT中
return block_addr + sit_i->sit_base_addr;
}
下面在再介绍一个跟物理位置有关的函数:
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
unsigned int start)
{
struct sit_info *sit_i = SIT_I(sbi);
struct page *src_page, *dst_page;
pgoff_t src_off, dst_off;
void *src_addr, *dst_addr;
src_off = current_sit_addr(sbi, start);//根据start得到sit位图上有效f2fs_sit_block的块地址
dst_off = next_sit_addr(sbi, src_off);//得到备份副本中的f2fs_sit_block的块地址
/* get current sit block page without lock */
src_page = get_meta_page(sbi, src_off);//从磁盘上读取根据start得到sit位图上有效f2fs_sit_block
dst_page = grab_meta_page(sbi, dst_off);//从磁盘上读取得到备份副本中的f2fs_sit_block的块地址
BUG_ON(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);//将有效f2fs_sit_block复制到备份
/*这里解释一下为什么要memcpy:因为新写的entry只能在旧块上写,而其他不修改的entry必须也得是最新的*/
set_page_dirty(dst_page);//设置备份为脏
f2fs_put_page(src_page, 1);
set_to_next_sit(sit_i, start);//将sit 位图上更换有效置位
return dst_page;
}
此函数将segno(是该块的第一个段号)所在的f2fs_sit_block从磁盘里读出来,实际上是读的旧版所在block,但是已经被新版本更新过。
NAT的实际磁盘布局如图所示。与SIT按block为单位备份不同的是,NAT按照segment为单位进行备份,前一个segment的备份紧跟在其后面,每个segment是有512个f2fs_nat_block,而每个f2fs_nat_block有455个f2fs_nat_entry。每个f2fs_nat_entry描述一个node块及其块地址的信息。
通过下面的函数即可看出该物理布局:
static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
pgoff_t block_off;
pgoff_t block_addr;
int seg_off;
block_off = NAT_BLOCK_OFFSET(start);//先找到这是第几个f2fs_nat_block
seg_off = block_off >> sbi->log_blocks_per_seg;//根据block_off得到f2fs_nat_block在NAT区域的第seg_off个segment
block_addr = (pgoff_t)(nm_i->nat_blkaddr +//NAT区域的起始地址加上segment和block的偏移
(seg_off << sbi->log_blocks_per_seg << 1) +//段偏移需要*2,因为是按照段进行备份的
(block_off & ((1 << sbi->log_blocks_per_seg) - 1)));//段内的块偏移不需要翻倍
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))//如果块位图上被置位了,就是表明后一个段的块才有效
block_addr += sbi->blocks_per_seg;//加上一个segment的块数
return block_addr;
}
该函数根据f2fs_nat_block的起始nid和有效位图获取其磁盘块地址。重点在于:
block_addr = (pgoff_t)(nm_i->nat_blkaddr +//NAT区域的起始地址加上segment和block的偏移
(seg_off << sbi->log_blocks_per_seg << 1) +//段偏移需要*2,因为是按照段进行备份的
(block_off & ((1 << sbi->log_blocks_per_seg) - 1)));//段内的块偏移不需要翻倍
因为最终的块地址=段偏移+段内块偏移,又由于是按照段进行备份的,所以计算出段偏移后需要x2,而段内偏移不需要翻倍。
下面这个函数是计算block_addr处的f2fs_nat_block的备份块的地址。
static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
pgoff_t block_addr)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
block_addr -= nm_i->nat_blkaddr;
if ((block_addr >> sbi->log_blocks_per_seg) % 2)//如果是奇数段偏移,则block_addr为后一个段的块
block_addr -= sbi->blocks_per_seg;
else
block_addr += sbi->blocks_per_seg;//如果是偶数段段偏移,则block_addr为前一个段的块
return block_addr + nm_i->nat_blkaddr;
}
下面在再介绍一个跟物理位置有关的函数
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
src_off = current_nat_addr(sbi, nid);//根据nid得到nat位图上有效f2fs_nat_block的块地址
dst_off = next_nat_addr(sbi, src_off);//得到备份副本中的f2fs_nat_block的块地址
/* get current nat block page with lock */
src_page = get_meta_page(sbi, src_off);//从磁盘上读取根据start得到nat位图上有效f2fs_nat_block
/* Dirty src_page means that it is already the new target NAT page. */
if (PageDirty(src_page))
return src_page;
dst_page = grab_meta_page(sbi, dst_off);//从磁盘上读取得到备份副本中的f2fs_nat_block的块地址
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);//将有效f2fs_nat_block复制到备份
/*这里解释一下为什么要memcpy:因为新写的entry只能在旧块上写,而其他不修改的entry必须也得是最新的*/
set_page_dirty(dst_page);//设置备份为脏
f2fs_put_page(src_page, 1);
set_to_next_nat(nm_i, nid);//将nat 位图上更换有效置位
return dst_page;
}
struct f2fs_nm_info { //node manager 信息
block_t nat_blkaddr; /* base disk address of NAT区域在磁盘上的起始地址 */
nid_t max_nid; /* maximum possible node ids */
nid_t init_scan_nid; /* the first nid to be scanned */
nid_t next_scan_nid; /* the next nid to be scanned */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache (nat entry缓存树的根) */
rwlock_t nat_tree_lock; /* protect nat_tree_lock */
unsigned int nat_cnt; /* the # of cached nat entries */
struct list_head nat_entries; /* cached nat entry list (clean) */
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
/* free node ids management */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
struct mutex build_lock; /* lock for build free nids */
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
int bitmap_size; /* bitmap size */
};
struct sit_info {
const struct segment_allocation *s_ops;
block_t sit_base_addr; /* start block address of SIT area */
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *sit_bitmap; /* SIT bitmap pointer */
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries 脏segment条目的bitmap*/
unsigned int dirty_sentries; /* # of dirty sentries */
unsigned int sents_per_block; /* # of SIT entries per block */
struct mutex sentry_lock; /* to protect SIT cache */
struct seg_entry *sentries; /* SIT segment-level cache //segment条目缓存,下标相对于free_segmap_info中start_segno*/
struct sec_entry *sec_entries; /* SIT section-level cache */
/* for cost-benefit algorithm in cleaning procedure */
unsigned long long elapsed_time; /* elapsed time after mount */
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
};
在sbi->f2fs_nm_info->nat_bitmap中是NAT的位图,sbi->sit_info->sit_bitmap为SIT的位图。
struct f2fs_checkpoint {
__le64 checkpoint_ver; /* checkpoint block version number */
__le64 user_block_count; /* # of user blocks */
__le64 valid_block_count; /* # of valid blocks in main area */
__le32 rsvd_segment_count; /* # of reserved segments for gc */
__le32 overprov_segment_count; /* # of overprovision segments */
__le32 free_segment_count; /* # of free segments in main area */
/* information of current node segments */
__le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
__le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
/* information of current data segments */
__le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
__le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
__le32 ckpt_flags; /* Flags : umount and journal_present */
__le32 cp_pack_total_block_count; /* total # of one cp pack */
__le32 cp_pack_start_sum; /* start block number of data summary */
__le32 valid_node_count; /* Total number of valid nodes */
__le32 valid_inode_count; /* Total number of valid inodes */
__le32 next_free_nid; /* Next free node number */
__le32 sit_ver_bitmap_bytesize; /* Default value 64 */
__le32 nat_ver_bitmap_bytesize; /* Default value 256 */
__le32 checksum_offset; /* checksum校验和 offset inside cp block */
__le64 elapsed_time; /* mounted time */
/* allocation type of current segment */
unsigned char alloc_type[MAX_ACTIVE_LOGS];
/* SIT and NAT version bitmap */
unsigned char sit_nat_version_bitmap[1];
} __packed;
位图在磁盘上是存储在f2fs_checkpoint里的sit_nat_version_bitmap:
//确定ckpt的sit_nat_version_bitmap中,nat和sit位图的位置
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
int offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
return &ckpt->sit_nat_version_bitmap + offset;
}
由上述函数可以看出,sit和NAT位图在sit_nat_version_bitmap中是紧挨着,其中sit在前部分,nat在后部分。
判断位图中某个位置是否置位的函数:
//addr是char(8bit)类型数组的位图,nr为偏移量,得到nr处的位图状况
static inline int f2fs_test_bit(unsigned int nr, char *addr)
{
int mask;
addr += (nr >> 3);//找到nr属于哪个字节
mask = 1 << (7 - (nr & 0x07));//nr & 0x07就是nr%8,7-他=从后面数的第几位,然后将1左移这些位就得到了nr处mask
return mask & *addr;//拿着mask与该字节处做与,就得到了该位的状况
}
还有置位和清除位的函数:
//segment内增加块,根据segment内偏移nr置位图
static inline int f2fs_set_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
addr += (nr >> 3);
mask = 1 << (7 - (nr & 0x07));
ret = mask & *addr;
*addr |= mask;
return ret;
}
//segment内删除块,根据segment内偏移nr置位图
static inline int f2fs_clear_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
addr += (nr >> 3);
mask = 1 << (7 - (nr & 0x07));
ret = mask & *addr;
*addr &= ~mask;
return ret;
}