master node用来记录管理所有on-flash上位置不固定的结构。
UBIFS把master node内容重复的写入LEB1和LEB2中,这样可以保证任何时刻都有一个有效的master node存在。master node大小为512 bytes,顺序的占用LEB的page,如果整个LEB都被master node写满,那么执行unmap操作,分配一个新空LEB。注意unmap LEB1 LEB2操作也要顺序的进行,如果同时unmap两个master LEB,系统就暂时的处于无有效master node的状态。
一个master LEB可能存在master node的多个版本,LEB内最后一个master node是有效的。
master node on-flash结构
struct ubifs_mst_node {
struct ubifs_ch ch;
__le64 highest_inum;
__le64 cmt_no;
__le32 flags;
__le32 log_lnum;
__le32 root_lnum;
__le32 root_offs;
__le32 root_len;
__le32 gc_lnum;
__le32 ihead_lnum;
__le32 ihead_offs;
__le64 index_size;
__le64 total_free;
__le64 total_dirty;
__le64 total_used;
__le64 total_dead;
__le64 total_dark;
__le32 lpt_lnum;
__le32 lpt_offs;
__le32 nhead_lnum;
__le32 nhead_offs;
__le32 ltab_lnum;
__le32 ltab_offs;
__le32 lsave_lnum;
__le32 lsave_offs;
__le32 lscan_lnum;
__le32 empty_lebs;
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
} __attribute__ ((packed));
@highest_inum: 当前inode number, 每创建一个新文件highest_inum都会加1,
ubifs的inode number是不能复用的, 比如file1的ino是100, 之后即便删除了file1, 但是ino 100在整个文件系统的生命周期内再也不能使用了.
@cmt_no: 文件系统最后一个commit number
@log_lnum: log area开始的LEB number
@root_lnum: root index node所在的LEB number
@root_offs: root index node在LEB内的偏移
@root_len: root index node的长度
@gc_lnum: UBIFS为garbage collection保留的LEB number, 如果没有设置,那么在系统mount时,要进行分配.
@ihead_lnum: 记录可写入index node的LEB number, ihead_lnum存在的目的是index node和non-index node要写在不同的LEB中.
@ihead_offs: ihead_lnum内的偏移量
@index_len: index在flash上的大小
@total_free: free space是擦除块尾部可写的空间大小,total_free则是LPT所管理的LEB空闲空间总和
@total_dirty: dirty space是废弃的和paddings节点,有可能被GC处理的空间大小. total dirty是LPT所管理LEB的dirty空间总和
@total_used: total used space in bytes (includes only data LEBs)
@total_dead: total dead space in bytes (includes only data LEBs)
@total_dark: total dark space in bytes (includes only data LEBs)
@lpt_lnum: LPT root nnode所在的LEB number
@lpt_offs: LPT root nnode在LEB内的偏移量
@nhead_lnum: 记录当前可写入LPT nnode的擦除块
@nhead_offs: nhead_lnum内的偏移量
@ltab_lnum: LTP自身lprops table所在的LEB
@ltab_offs:
@lsave_lnum: 仅big model有效, lsave table内保存着一些重要的LEBs, 重要LEBs是指(重要性从高到低):empty, freeable, freeable index, dirty index, dirty or free, 有了这个表,我们就可以在mount时把这些LEB的pnode读入内存,以防止扫描整个LPT; 对于small model, 系统假定扫描LPT代价很小;
@lsave_offs: offset of lsave_lnum
@lcan_lnum: 最后一次LPT scan的LEB number
@empty_lebs: empty LEBs 数目
@idx_lebs: 是当前index使用的LEBs数目, 这个数目可能很大, 因为UBIFS在还有空闲空间的情况下,并不对index lebs做合并. 也就是说index lebs占用了很多空间,但是又包含了很多dirty space.
@leb_cnt: 文件系统使用的LEB数目
@padding[344]: master node对齐为512 bytes
fs/ubifs/master.c
27 /**
28 * scan_for_master - search the valid master node.
29 * @c: UBIFS file-system description object
30 *
31 * This function scans the master node LEBs and search for the latest master
32 * node. Returns zero in case of success, %-EUCLEAN if there master area is
33 * corrupted and requires recovery, and a negative error code in case of
34 * failure.
35 */
36 static int scan_for_master(struct ubifs_info *c)
37 {
38 struct ubifs_scan_leb *sleb;
39 struct ubifs_scan_node *snod;
40 int lnum, offs = 0, nodes_cnt;
41
42 lnum = UBIFS_MST_LNUM;
43
44 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
45 if (IS_ERR(sleb))
46 return PTR_ERR(sleb);
47 nodes_cnt = sleb->nodes_cnt;
48 if (nodes_cnt > 0) {
49 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
50 list);
51 if (snod->type != UBIFS_MST_NODE)
52 goto out_dump;
53 memcpy(c->mst_node, snod->node, snod->len);
54 offs = snod->offs;
55 }
56 ubifs_scan_destroy(sleb);
57
58 lnum += 1;
59
60 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
61 if (IS_ERR(sleb))
62 return PTR_ERR(sleb);
63 if (sleb->nodes_cnt != nodes_cnt)
64 goto out;
65 if (!sleb->nodes_cnt)
66 goto out;
67 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
68 if (snod->type != UBIFS_MST_NODE)
69 goto out_dump;
70 if (snod->offs != offs)
71 goto out;
72 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
73 (void *)snod->node + UBIFS_CH_SZ,
74 UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
75 goto out;
76 c->mst_offs = offs;
77 ubifs_scan_destroy(sleb);
78 return 0;
79
80 out:
81 ubifs_scan_destroy(sleb);
82 return -EUCLEAN;
83
84 out_dump:
85 ubifs_err("unexpected node type %d master LEB %d:%d",
86 snod->type, lnum, snod->offs);
87 ubifs_scan_destroy(sleb);
88 return -EINVAL;
89 }
这个函数扫描master node所在的LEB,找到有效的master node,把master node的内容复制到ubifs_info->mst_node成员变量中
44 扫描master LEB,生成UBIFS scanned LEB information,结果保存在sleb中
47~56 找到最后一个snode,如果node类型为MASTER NODE,那么把这个node的内容复制到c->mst_node中
58 准备扫描backup master node
60 ~ 77 确保第二个master LEB内的master node和第一个master LEB内相同
78 返回0表示扫描成功
90
91 /**
92 * validate_master - validate master node.
93 * @c: UBIFS file-system description object
94 *
95 * This function validates data which was read from master node. Returns zero
96 * if the data is all right and %-EINVAL if not.
97 */
98 static int validate_master(const struct ubifs_info *c)
99 {
100 long long main_sz;
101 int err;
102
103 if (c->max_sqnum >= SQNUM_WATERMARK) {
104 err = 1;
105 goto out;
106 }
107
108 if (c->cmt_no >= c->max_sqnum) {
109 err = 2;
110 goto out;
111 }
112
113 if (c->highest_inum >= INUM_WATERMARK) {
114 err = 3;
115 goto out;
116 }
117
118 if (c->lhead_lnum < UBIFS_LOG_LNUM ||
119 c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs ||
120 c->lhead_offs < 0 || c->lhead_offs >= c->leb_size ||
121 c->lhead_offs & (c->min_io_size - 1)) {
122 err = 4;
123 goto out;
124 }
125
126 if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first ||
127 c->zroot.offs >= c->leb_size || c->zroot.offs & 7) {
128 err = 5;
129 goto out;
130 }
131
132 if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len ||
133 c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) {
134 err = 6;
135 goto out;
136 }
137
138 if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) {
139 err = 7;
140 goto out;
141 }
142
143 if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first ||
144 c->ihead_offs % c->min_io_size || c->ihead_offs < 0 ||
145 c->ihead_offs > c->leb_size || c->ihead_offs & 7) {
146 err = 8;
147 goto out;
148 }
149
150 main_sz = (long long)c->main_lebs * c->leb_size;
151 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
152 err = 9;
153 goto out;
154 }
155
156 if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last ||
157 c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) {
158 err = 10;
159 goto out;
160 }
161
162 if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last ||
163 c->nhead_offs < 0 || c->nhead_offs % c->min_io_size ||
164 c->nhead_offs > c->leb_size) {
165 err = 11;
166 goto out;
167 }
168
169 if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last ||
170 c->ltab_offs < 0 ||
171 c->ltab_offs + c->ltab_sz > c->leb_size) {
172 err = 12;
173 goto out;
174 }
175
176 if (c->big_lpt && (c->lsave_lnum < c->lpt_first ||
177 c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 ||
178 c->lsave_offs + c->lsave_sz > c->leb_size)) {
179 err = 13;
180 goto out;
181 }
182
183 if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) {
184 err = 14;
185 goto out;
186 }
187
188 if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) {
189 err = 15;
190 goto out;
191 }
192
193 if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) {
194 err = 16;
195 goto out;
196 }
197
198 if (c->lst.total_free < 0 || c->lst.total_free > main_sz ||
199 c->lst.total_free & 7) {
200 err = 17;
201 goto out;
202 }
203
204 if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) {
205 err = 18;
206 goto out;
207 }
208
209 if (c->lst.total_used < 0 || (c->lst.total_used & 7)) {
210 err = 19;
211 goto out;
212 }
213
214 if (c->lst.total_free + c->lst.total_dirty +
215 c->lst.total_used > main_sz) {
216 err = 20;
217 goto out;
218 }
219
220 if (c->lst.total_dead + c->lst.total_dark +
221 c->lst.total_used + c->old_idx_sz > main_sz) {
222 err = 21;
223 goto out;
224 }
225
226 if (c->lst.total_dead < 0 ||
227 c->lst.total_dead > c->lst.total_free + c->lst.total_dirty ||
228 c->lst.total_dead & 7) {
229 err = 22;
230 goto out;
231 }
232
233 if (c->lst.total_dark < 0 ||
234 c->lst.total_dark > c->lst.total_free + c->lst.total_dirty ||
235 c->lst.total_dark & 7) {
236 err = 23;
237 goto out;
238 }
239
240 return 0;
241
242 out:
243 ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
244 dbg_dump_node(c, c->mst_node);
245 return -EINVAL;
246 }
这个函数验证那些从master获取来的一些数值
103 检查max_sqnum,不能大于SQNUM_WATERMARK,max_sqnum是64bit的,系统不可能超过这个界限
113 检查highest_inum,系统支持的最高inode number,ubifs不能支持inode number复用,highest_inum是64bit,理论上也不大可能超过这个界限
247
248 /**
249 * ubifs_read_master - read master node.
250 * @c: UBIFS file-system description object
251 *
252 * This function finds and reads the master node during file-system mount. If
253 * the flash is empty, it creates default master node as well. Returns zero in
254 * case of success and a negative error code in case of failure.
255 */
256 int ubifs_read_master(struct ubifs_info *c)
257 {
258 int err, old_leb_cnt;
259
260 c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);
261 if (!c->mst_node)
262 return -ENOMEM;
263
264 err = scan_for_master(c);
265 if (err) {
266 if (err == -EUCLEAN)
267 err = ubifs_recover_master_node(c);
268 if (err)
269 /*
270 * Note, we do not free 'c->mst_node' here because the
271 * unmount routine will take care of this.
272 */
273 return err;
274 }
275
276 /* Make sure that the recovery flag is clear */
277 c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);
278
279 c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum);
280 c->highest_inum = le64_to_cpu(c->mst_node->highest_inum);
281 c->cmt_no = le64_to_cpu(c->mst_node->cmt_no);
282 c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum);
283 c->zroot.offs = le32_to_cpu(c->mst_node->root_offs);
284 c->zroot.len = le32_to_cpu(c->mst_node->root_len);
285 c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum);
286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
289 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
293 c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs);
294 c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum);
295 c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs);
296 c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum);
297 c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs);
298 c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum);
299 c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs);
300 c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs);
301 old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt);
302 c->lst.total_free = le64_to_cpu(c->mst_node->total_free);
303 c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);
304 c->lst.total_used = le64_to_cpu(c->mst_node->total_used);
305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
307
308 c->calc_idx_sz = c->old_idx_sz;
309
310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
311 c->no_orphs = 1;
312
313 if (old_leb_cnt != c->leb_cnt) {
314 /* The file system has been resized */
315 int growth = c->leb_cnt - old_leb_cnt;
316
317 if (c->leb_cnt < old_leb_cnt ||
318 c->leb_cnt < UBIFS_MIN_LEB_CNT) {
319 ubifs_err("bad leb_cnt on master node");
320 dbg_dump_node(c, c->mst_node);
321 return -EINVAL;
322 }
323
324 dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",
325 old_leb_cnt, c->leb_cnt);
326 c->lst.empty_lebs += growth;
327 c->lst.total_free += growth * (long long)c->leb_size;
328 c->lst.total_dark += growth * (long long)c->dark_wm;
329
330 /*
331 * Reflect changes back onto the master node. N.B. the master
332 * node gets written immediately whenever mounting (or
333 * remounting) in read-write mode, so we do not need to write it
334 * here.
335 */
336 c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);
337 c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);
338 c->mst_node->total_free = cpu_to_le64(c->lst.total_free);
339 c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);
340 }
341
342 err = validate_master(c);
343 if (err)
344 return err;
345
346 err = dbg_old_index_check_init(c, &c->zroot);
347
348 return err;
349 }
该函数从flash上读取master node,保存到ubifs_info->mst_node,并且用master node的一些值初始化ubifs_info。
313 ~ 328 如果发现ubifs_info->leb_cnt(根据superblock中的leb_cnt初始化)不等于master node中的leb count,说明文件系统已经被resize了,需要重新设置从master node得到的系统参数
330 ~ 340 修改c->mst_node内相应的值,把改变反应到master node上
351 /**
352 * ubifs_write_master - write master node.
353 * @c: UBIFS file-system description object
354 *
355 * This function writes the master node. The caller has to take the
356 * @c->mst_mutex lock before calling this function. Returns zero in case of
357 * success and a negative error code in case of failure. The master node is
358 * written twice to enable recovery.
359 */
360 int ubifs_write_master(struct ubifs_info *c)
361 {
362 int err, lnum, offs, len;
363
364 if (c->ro_media)
365 return -EROFS;
366
367 lnum = UBIFS_MST_LNUM;
368 offs = c->mst_offs + c->mst_node_alsz;
369 len = UBIFS_MST_NODE_SZ;
370
371 if (offs + UBIFS_MST_NODE_SZ > c->leb_size) {
372 err = ubifs_leb_unmap(c, lnum);
373 if (err)
374 return err;
375 offs = 0;
376 }
377
378 c->mst_offs = offs;
379 c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
380
381 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
382 if (err)
383 return err;
384
385 lnum += 1;
386
387 if (offs == 0) {
388 err = ubifs_leb_unmap(c, lnum);
389 if (err)
390 return err;
391 }
392 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
393
394 return err;
395 }
把c->mst_node写入flash,需要写两个master node
371 ~ 376 如果已经写到LEB的末尾,剩余空间不能容纳一个master node,那么unmap master LEB,分配一个新的LEB。
378 ~ 383 写入第一个master node
385 ~ 392 写入backup的master node