限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
系统启动后,用 fdisk -l
命令去查看磁盘分区的情况,如:
$ sudo fdisk -l
Disk /dev/sda: 200 GiB, 214748364800 bytes, 419430400 sectors
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disklabel type: dos
Disk identifier: 0x54b75f22
Device Boot Start End Sectors Size Id Type
/dev/sda1 * 2048 411041791 411039744 196G 83 Linux
/dev/sda2 411041792 419430399 8388608 4G 5 Extended
/dev/sda5 411043840 419430399 8386560 4G 82 Linux swap / Solaris
我想大家会好奇,这些磁盘是怎么划定的?系统刚启动,用户还也没有去进行磁盘分区的手工操作,谁完成了这些工作?本文针对 嵌入式系统
环境,简要的对系统启动期间、按设备的组织形式、分区的设定方式不同、对几种比较典型的磁盘分区的建立工作做简要介绍。
以 TI(Texas Instruments)
的 AM335X
为例,说明 通过 DTS 设定创建磁盘分区
的简要流程。先看 DTS 配置:
gpmc: gpmc@50000000 {
compatible = "ti,am3352-gpmc";
...
nand@0,0 {
compatible = "ti,omap2-nand";
...
/* 通过 DTS 将 NAND 设备划分为 11 个分区 */
partition@0 {
label = "NAND.SPL";
reg = <0x00000000 0x000020000>;
};
partition@1 {
label = "NAND.SPL.backup1";
reg = <0x00020000 0x00020000>;
};
partition@2 {
label = "NAND.SPL.backup2";
reg = <0x00040000 0x00020000>;
};
partition@3 {
label = "NAND.SPL.backup3";
reg = <0x00060000 0x00020000>;
};
partition@4 {
label = "NAND.u-boot-spl-os";
reg = <0x00080000 0x00040000>;
};
partition@5 {
label = "NAND.u-boot";
reg = <0x000C0000 0x00100000>;
};
partition@6 {
label = "NAND.u-boot-env";
reg = <0x001C0000 0x00020000>;
};
partition@7 {
label = "NAND.u-boot-env.backup1";
reg = <0x001E0000 0x00020000>;
};
partition@8 {
label = "NAND.kernel";
reg = <0x00200000 0x00800000>;
};
partition@9 {
label = "NAND.rootfs";
reg = <0x00A00000 0x0D600000>;
};
partition@10 {
label = "NAND.userdata";
reg = <0x0E000000 0>;
};
};
};
通过 DTS 分区配置建立 MTD 存储设备分区流程如下:
omap_nand_probe() /* drivers/mtd/nand/raw/omap2.c */
...
err = mtd_device_register(mtd, NULL, 0); /* include/linux/mtd/mtd.h */
/* @parts = NULL, @nr_parts = 0 */
mtd_device_parse_register(master, NULL, NULL, parts, nr_parts) /* drivers/mtd/core.c */
ret = parse_mtd_partitions(mtd, types, parser_data);
...
/* 解析 MTD 设备 DTS 设定的分区 */
ret = mtd_part_of_parse(master, &pparts);
...
/* drivers/mtd/ofpart.c */
parse_fixed_partitions() /* DTS 分区解析 */
...
/* 注册 MTD 设备分区到系统 */
err = add_mtd_partitions(master, pparts.parts,
pparts.nr_parts);
上述过程,观察到如下内核日志:
[ 1.635286] omap-gpmc 50000000.gpmc: GPMC revision 6.0
[ 1.640473] gpmc_mem_init: disabling cs 0 mapped at 0x0-0x1000000
[ 1.648388] nand: device found, Manufacturer ID: 0x2c, Chip ID: 0xda
[ 1.654908] nand: Micron MT29F2G08AAD
[ 1.658589] nand: 256 MiB, SLC, erase size: 128 KiB, page size: 2048, OOB size: 64
[ 1.666252] nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme
[ 1.671692] 11 fixed-partitions partitions found on MTD device omap2-nand.0
[ 1.678704] Creating 11 MTD partitions on "omap2-nand.0":
[ 1.684146] 0x000000000000-0x000000020000 : "NAND.SPL"
[ 1.690415] 0x000000020000-0x000000040000 : "NAND.SPL.backup1"
[ 1.697268] 0x000000040000-0x000000060000 : "NAND.SPL.backup2"
[ 1.704018] 0x000000060000-0x000000080000 : "NAND.SPL.backup3"
[ 1.710719] 0x000000080000-0x0000000c0000 : "NAND.u-boot-spl-os"
[ 1.717798] 0x0000000c0000-0x0000001c0000 : "NAND.u-boot"
[ 1.724932] 0x0000001c0000-0x0000001e0000 : "NAND.u-boot-env"
[ 1.731558] 0x0000001e0000-0x000000200000 : "NAND.u-boot-env.backup1"
[ 1.738956] 0x000000200000-0x000000a00000 : "NAND.kernel"
[ 1.752614] 0x000000a00000-0x00000e000000 : "NAND.rootfs"
[ 1.958515] 0x00000e000000-0x000010000000 : "NAND.userdata"
除了上一小节提到的通过 DTS 指定 MTD 设备分区外,我们还可以通过 内核命令行参数 mtdparts=
来指定 MTD 设备的分区。如:
mtdparts=8000000.nand:128k(NAND.SPL),128k(NAND.SPL.backup1),128k(NAND.SPL.backup2),128k(NAND.SPL.backup3),256k(NAND.u-boot-spl-os),1M(NAND.u-boot),128k(NAND.u-boot-env),128k(NAND.u-boot-env.backup1),8M(NAND.kernel),214M(NAND.rootfs),32M(NAND.userdata)
除了解析接口变为了 parse_cmdline_partitions()
外,其它解析过程与上一小节中描述类似。假设我们仍然使用上一小节中的 OMAP NAND 设备,只是我们使用 内核命令行参数 mtdparts=
的形式,而不是 DTS 来定义设备分区 细节如下:
omap_nand_probe() /* drivers/mtd/nand/raw/omap2.c */
...
err = mtd_device_register(mtd, NULL, 0); /* include/linux/mtd/mtd.h */
/* @parts = NULL, @nr_parts = 0 */
mtd_device_parse_register(master, NULL, NULL, parts, nr_parts) /* drivers/mtd/core.c */
ret = parse_mtd_partitions(mtd, types, parser_data);
...
/* drivers/mtd/cmdlinepart.c */
parse_cmdline_partitions() /* 解析内核命令行 MTD 分区参数: mtdparts= */
...
/* 注册 MTD 设备分区到系统 */
err = add_mtd_partitions(master, pparts.parts,
pparts.nr_parts);
MTD (Memory Technology Device)
类设备分区,可通过 DTS
或 内核命令行参数 mtdparts=
两种方式进行设定。MTD 设备驱动调用 mtd_device_register()
将设备注册到系统期间,MTD 子系统通过 register_mtd_parser()
注册的 MTD 分区解析器,解析 MTD 的设备分区并注册到系统。
sysfs
目录:/sys/bus/platform/devices/8000000.nand/mtd/*
/sys/class/mtd/*
/dev/mtdN
/dev/mtdNro
/dev/mtdblockN
通过 块(block)设备驱动
进行管理的 块(block)类设备
的分区,不同于 MTD(Memory Technology Device)
类分区建立过程,它有着不同的方式。
以 Rockchip 的 SFC(Serial Flash Interface) 接口、烧写有 GPT 分区信息的 NAND Flash 分区的建立过程为例,来描述 块(block)类设备
的分区的建立过程。我们只重点关注平台无关的部分:
/* Rockchip 平台相关部分:不用在意 */
ret = rkflash_blk_register(&mytr);
...
ret = register_blkdev(blk_ops->major, blk_ops->name);
...
blk_ops->rq = blk_mq_init_sq_queue(blk_ops->tag_set, &rkflash_mq_ops, 1,
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
...
rkflash_blk_add_dev(dev, blk_ops, &part);
struct gendisk *gd;
gd = alloc_disk(1 << blk_ops->minorbits);
...
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%d", blk_ops->name, dev->devnum); /* "rkflash0" */
...
add_disk(gd); /* 关注的重点 */
/* 平台无关部分 */
add_disk(gd) /* include/linux/genhd.h */
device_add_disk(NULL, disk, NULL); /* block/genhd.c */
...
register_disk(parent, disk);
...
bdev->bd_invalidated = 1;
err = blkdev_get(bdev, FMODE_READ, NULL);
...
res = __blkdev_get(bdev, mode, 0);
...
if (!bdev->bd_openers) {
...
if (!partno) {
...
if (bdev->bd_invalidated) {
if (!ret)
rescan_partitions(disk, bdev); // 见后续分析 (1)
...
}
...
}
...
}
...
...
...
...
// 接前面 (1) 处分析
rescan_partitions(disk, bdev); /* fs/block_dev.c */
...
/*
* 1. 解析 block 设备磁盘分区:
* check_partition() 调用 block/partitions/check.c 分区解析接口表
* @check_part[] 中各接口, 尝试解析 block 磁盘分区到 @state .
*/
if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) // 见后续分析 (2)
return 0;
...
/* add partitions */
/* 2. 添加 block 磁盘所有分区到系统 */
for (p = 1; p < state->limit; p++) {
...
part = add_partition(disk, p, from, size,
state->parts[p].flags,
&state->parts[p].info);
...
}
// 接前面 (2) 处分析
state = check_partition(disk, bdev) /* block/partitions/check.c */
struct parsed_partitions *state;
...
state = allocate_partitions(hd);
...
state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
state->pp_buf[0] = '\0';
state->bdev = bdev;
disk_name(hd, 0, state->name); /* 磁盘名称, 如 rkflash0 */
snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); /* @state->pp_buf: " rkflash0:" */
if (isdigit(state->name[strlen(state->name)-1]))
sprintf(state->name, "p"); /* @state->name: "p" */
/*
* 调用 block/partitions/check.c 分区解析接口表 @check_part[]
* 中各接口, 尝试解析 block 磁盘分区
*/
while (!res && check_part[i]) {
memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
/* 假设 block 设备的 使用 GPT 分区 */
res = check_part[i++](state); /* efi_partition() */ // 见后续分析 (3)
...
}
...
if (res > 0) {
/*
* 打印 GPT 分区信息 内核日志:
* [ 0.513590] rkflash0: p1 p2 p3 p4 p5 p6
*/
printk(KERN_INFO "%s", state->pp_buf);
free_page((unsigned long)state->pp_buf);
return state;
}
...
// 接前面 (3) 处分析
res = check_part[i++](state);
efi_partition(state) /* block/partitions/efi.c */
...
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
...
}
pr_debug("GUID Partition Table is valid! Yea!\n");
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
...
put_partition(state, i+1, start * ssz, size * ssz); /* 提取 GPT 分区信息 */
...
}
到此,块(block)设备
GPT 类型分区的解析过程已经分析完毕。事实上,块设备支持很多类型的分区解析器:
static int (*check_part[])(struct parsed_partitions *) = {
...
#ifdef CONFIG_CMDLINE_PARTITION
/* block/partitions/cmdline.c: 内核命令行参数 "blkdevparts=" */
cmdline_partition,
#endif
#ifdef CONFIG_EFI_PARTITION
/* block/partitions/efi.c */
efi_partition, /* this must come before msdos */
#endif
#ifdef CONFIG_SGI_PARTITION
/* block/partitions/sgi.c */
sgi_partition,
#endif
#ifdef CONFIG_LDM_PARTITION
/* block/partitions/ldm.c */
ldm_partition, /* this must come before msdos */
#endif
#ifdef CONFIG_MSDOS_PARTITION
/* block/partitions/msdos.c */
msdos_partition, /* MS DOS 分区 */
#endif
...
NULL /* NULL 表示结尾 */
};
可以看到,块(block)设备
支持很多类型的分区,感兴趣的读者可自行查阅相关源码。
/sys/class/block/*
/sys/devices/virtual/block/*
/dev/block/*