备注:
1. Kernel版本:5.4
2. 使用工具:Source Insight 4.0
3. 参考博客:
(1)[sd card] sd card初始化流程
(2)[sd card] mmc_blk层为sd card创建块设备流程
由通用块层(Generic Block Layer)负责维持一个I/O请求在上层文件系统与底层物理磁盘之间的关系。在通用块层中,通常用一个bio结构体来对应一个I/O请求。
在Linux中,驱动对块设备的输入或输出(I/O)操作,都会向块设备发出一个请求,在IO调度层中用request结构体描述。
但对于一些磁盘设备而言请求的速度很慢,这时候内核就提供一种队列的机制把这些I/O请求(request结构体)添加到队列中(即:请求队列),在驱动中用request_queue结构体描述。在向块设备提交这些请求前内核会先执行请求的合并和排序预操作,以提高访问的效率,然后再由内核中的I/O调度程序子系统来负责提交 I/O 请求, 调度程序将磁盘资源分配给系统中所有挂起的块 I/O 请求,其工作是管理块设备的请求队列,决定队列中的请求的排列顺序以及什么时候派发请求到设备。
Linux提供了一个gendisk数据结构体,用来表示一个独立的磁盘设备或分区,用于对底层物理磁盘进行访问。在gendisk中有一个类似字符设备中file_operations的硬件操作结构指针,是block_device_operations结构体。
文件系统—>通用块层—>IO调度层—>块设备驱动层
文件系统会向通用块层发起块读写请求
通用块层将读写请求封装成bio结构体(block io),下发到IO调度层
IO调度层将bio封装成request结构体,并将request添加到对应块设备(gendisk)的请求队列(request_queue)中,并做一些预处理。
IO调度层对请求队列(request_queue)中的request进行预处理之后,调用请求队列(request_queue)的回调函数来对队列中的IO(request)进行处理
而请求队列(request_queue)以及request处理回调函数则是由块设备驱动层提供,也就是块设备驱动层会从请求队列(request_queue)提取request并且进行实质性的处理。
通用块层:struct bio
IO调度层:struct request(封装了struct bio)、struct request_queue
块设备驱动层:
各类块设备驱动层对于IO请求的封装是不一样的。以sd card的mmc块设备驱动层为例:
struct mmc_queue_req(封装了struct request)、struct mmc_queue(封装了struct request_queue)。
对于sd card而言,整个数据流是:
文件系统—>通用块层—>IO调度层—>mmc_blk层—>mmc core层—>mmc host层—>mmc硬件总线—>sd card
//源码:drivers/mmc/core/block.c
static struct mmc_driver mmc_driver = {
.drv = {
.name = "mmcblk",
.pm = &mmc_blk_pm_ops,
},
.probe = mmc_blk_probe,
.remove = mmc_blk_remove,
.shutdown = mmc_blk_shutdown,
};
static int __init mmc_blk_init(void)
{
int res;
res = bus_register(&mmc_rpmb_bus_type);
if (res < 0) {
pr_err("mmcblk: could not register RPMB bus type\n");
return res;
}
res = alloc_chrdev_region(&mmc_rpmb_devt, 0, MAX_DEVICES, "rpmb");
if (res < 0) {
pr_err("mmcblk: failed to allocate rpmb chrdev region\n");
goto out_bus_unreg;
}
if (perdev_minors != CONFIG_MMC_BLOCK_MINORS)
pr_info("mmcblk: using %d minors per device\n", perdev_minors);
max_devices = min(MAX_DEVICES, (1 << MINORBITS) / perdev_minors);
// 注册 mmc 块设备
res = register_blkdev(MMC_BLOCK_MAJOR, "mmc");
if (res)
goto out_chrdev_unreg;
// 注册mmc driver —— mmcblk
// mmc_driver注册到mmc_bus上,当有mmc_card注册到mmc_bus上时,其probe方法就会执行。
res = mmc_register_driver(&mmc_driver);
if (res)
goto out_blkdev_unreg;
return 0;
out_blkdev_unreg:
unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
out_chrdev_unreg:
unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES);
out_bus_unreg:
bus_unregister(&mmc_rpmb_bus_type);
return res;
}
//源码:drivers/mmc/core/block.c
static int mmc_blk_probe(struct mmc_card *card)
{
struct mmc_blk_data *md, *part_md;
char cap_str[10];
/*
* Check that the card supports the command class(es) we need.
*/
/** 判断card是不是一个block设备 **/
// 前面说过了,只要是mmc_card(包括SDIO card)被注册到mmc_bus上,
// 那么所有mmc_bus上的mmc_driver都会被匹配到
// 而mmc_blk只使用于存储设备(emmc、sd card、mmc card),并不能驱动于SDIO card
// 因此,这里根据是否支持块读写属性判断card是不是一个存储设备,
// 如果不是的话,说明并不能使用mmc_blk这个mmc_driver来驱动mmc_card.
if (!(card->csd.cmdclass & CCC_BLOCK_READ))
return -ENODEV;
mmc_fixup_device(card, mmc_blk_fixups);
// 申请 mmc complete 工作队列
card->complete_wq = alloc_workqueue("mmc_complete",
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
if (unlikely(!card->complete_wq)) {
pr_err("Failed to create mmc completion workqueue");
return -ENOMEM;
}
/** 为mmc_card分配和设置mmc_blk_data **/
/** 在mmc_blk_alloc中会去分配和设置mmc_queue、request_queue、gendisk **/
// 1. 创建struct mmc_blk_data
// 2. 初始化mmc_blk_data
// 3. 初始化struct mmc_blk_data->mmc_queue
// 4. 创建struct mmc_blk_data->mmc_queue->request_queue(创建struct request_queue)
// 5. 分配和初始化struct mmc_blk_data->gendisk(创建struct gendisk & 初始化struct gendisk)
// 6. 关联struct mmc_blk_data->gendisk 和 mmc_blk_data->mmc_queue->request_queue
// (关联struct gendisk和struct request_queue)
md = mmc_blk_alloc(card);
if (IS_ERR(md))
return PTR_ERR(md);
string_get_size((u64)get_capacity(md->disk), 512, STRING_UNITS_2,
cap_str, sizeof(cap_str));
pr_info("%s: %s %s %s %s\n",
md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
cap_str, md->read_only ? "(ro)" : "");
/**
一个存储设备上(例如emmc)上可能有多个物理分区,
这里用于为这些物理分区(例如rpmb分区)分配和设置mmc_blk_data
**/
/**
SD card上只有一个物理分区,所以我们这里不care,
后面学习emmc的时候再说明
**/
if (mmc_blk_alloc_parts(card, md))
goto out;
/** 关联mmc_card和mmc_blk_data **/
dev_set_drvdata(&card->dev, md);
/** 将mmc_blk构造的gendisk注册到系统中,生成对应的块设备 **/
if (mmc_add_disk(md))
goto out;
/** 将其他物理分区的gendisk注册到系统中,生成对应的块设备。 **/
/** sd card 上只有一个分区,所以这里我们同样不关心 **/
list_for_each_entry(part_md, &md->part, part) {
if (mmc_add_disk(part_md))
goto out;
}
/* Add two debugfs entries */
mmc_blk_add_debugfs(card, md);
pm_runtime_set_autosuspend_delay(&card->dev, 3000);
pm_runtime_use_autosuspend(&card->dev);
/*
* Don't enable runtime PM for SD-combo cards here. Leave that
* decision to be taken during the SDIO init sequence instead.
*/
if (card->type != MMC_TYPE_SD_COMBO) {
pm_runtime_set_active(&card->dev);
pm_runtime_enable(&card->dev);
}
return 0;
out:
mmc_blk_remove_parts(card, md);
mmc_blk_remove_req(md);
return 0;
}
通过上述代码,可以知道对于“mmc_blk层为sd card创建块设备流程”来说,有两个核心代码来实现。
//源码:drivers/mmc/core/block.c
static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
{
sector_t size;
/** 以下先获取card容量,以扇区为单位 **/
if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
/*
* The EXT_CSD sector count is in number or 512 byte
* sectors.
*/
// 对于emmc设备来说,其容量是从ext_csd寄存器的sectors域获取
size = card->ext_csd.sectors;
} else {
/*
* The CSD capacity field is in units of read_blkbits.
* set_capacity takes units of 512 bytes.
*/
// 对于sd card来说,其容量是从csd的capacity域获取的
// 计算方法memory capacity = (C_SIZE+1) * 512K byte
size = (typeof(sector_t))card->csd.capacity
<< (card->csd.read_blkbits - 9);
}
/** 调用mmc_blk_alloc_req来实现前面所说的工作 **/
return mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
MMC_BLK_DATA_AREA_MAIN);
}
mmc_blk_alloc_req()函数
//源码:drivers/mmc/core/block.c
static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
struct device *parent,
sector_t size,
bool default_ro,
const char *subname,
int area_type)
{
struct mmc_blk_data *md;
int devidx, ret;
/** 分配一个mmcblk的从设备号 **/
devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL);
if (devidx < 0) {
/*
* We get -ENOSPC because there are no more any available
* devidx. The reason may be that, either userspace haven't yet
* unmounted the partitions, which postpones mmc_blk_release()
* from being called, or the device has more partitions than
* what we support.
*/
if (devidx == -ENOSPC)
dev_err(mmc_dev(card->host),
"no more device IDs available\n");
return ERR_PTR(devidx);
}
/** 以下就是分配mmc_blk_data并进行设置,也是mmc_blk_alloc_req的核心工作 **/
/* 对应“1. 创建struct mmc_blk_data” */
md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
if (!md) {
ret = -ENOMEM;
goto out;
}
/* 以下对应“2. 初始化mmc_blk_data” */
//---- 设置mmc_blk_data->area_type
md->area_type = area_type;
/*
* Set the read-only status based on the supported commands
* and the write protect switch.
*/
//---- 设置mmc_blk_data->read_only
md->read_only = mmc_blk_readonly(card); // 设置只读属性
//---- 分配mmc_blk_data->disk
md->disk = alloc_disk(perdev_minors); // 调用alloc_disk分配一个gendisk结构体
if (md->disk == NULL) {
ret = -ENOMEM;
goto err_kfree;
}
//---- 设置mmc_blk_data->part、rpmbs、usage
INIT_LIST_HEAD(&md->part); // 挂载其他物理分区的链表
INIT_LIST_HEAD(&md->rpmbs); // 挂载rpmbs物理分区的链表
md->usage = 1; // 使用计数设置为1
//---- 核心:
// 设置mmc_blk_data->queue,
// 会调用mmc_init_queue来分配和设置mmc_blk_data->queue,
// 在这里面会创建对应的request_queue.
/*
以下对应“3.
初始化struct mmc_blk_data->mmc_queue”
*/
/*
以下对应“4.
创建struct mmc_blk_data->mmc_queue->request_queue(创建struct request_queue)”
*/
ret = mmc_init_queue(&md->queue, card);
if (ret)
goto err_putdisk;
md->queue.blkdata = md; // 关联mmc_queue和mmc_blk_data
/*
* Keep an extra reference to the queue so that we can shutdown the
* queue (i.e. call blk_cleanup_queue()) while there are still
* references to the 'md'. The corresponding blk_put_queue() is in
* mmc_blk_put().
*/
if (!blk_get_queue(md->queue.queue)) {
mmc_cleanup_queue(&md->queue);
ret = -ENODEV;
goto err_putdisk;
}
//---- 核心:设置mmc_blk_data->disk
/*
以下对应“5.
分配和初始化struct mmc_blk_data->gendisk(
创建struct gendisk & 初始化struct gendisk)”
*/
// 分配gendisk放在前面完成了
md->disk->major = MMC_BLOCK_MAJOR; // 设置块设备的主设备号为MMC_BLOCK_MAJOR,179
md->disk->first_minor = devidx * perdev_minors; // 设置块设备的从设备号
md->disk->fops = &mmc_bdops; // 设置块设备操作集
md->disk->private_data = md; // 关联gendisk和mmc_blk_data
/*
以下对应“6.
关联struct mmc_blk_data->gendisk
和 mmc_blk_data->mmc_queue->request_queue
关联struct gendisk和struct request_queue)”
*/
md->disk->queue = md->queue.queue; // 重要,关联gendisk和request_queue!!!
md->parent = parent;
set_disk_ro(md->disk, md->read_only || default_ro); // 设置gendisk的只读属性
md->disk->flags = GENHD_FL_EXT_DEVT; // 设置gendisk的一些标识
if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
md->disk->flags |= GENHD_FL_NO_PART_SCAN
| GENHD_FL_SUPPRESS_PARTITION_INFO;
/*
* As discussed on lkml, GENHD_FL_REMOVABLE should:
*
* - be set for removable media with permanent block devices
* - be unset for removable block devices with permanent media
*
* Since MMC block devices clearly fall under the second
* case, we do not set GENHD_FL_REMOVABLE. Userspace
* should use the block device creation/destruction hotplug
* messages to tell when the card is present.
*/
// 设置块设备的设备名,例如mmcblk0、mmcblk1,
// 而mmcblk0rpmb中的rpmb则是指subname
snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
"mmcblk%u%s", card->host->index, subname ? subname : "");
// 设置gendisk的容量,size是以扇区为单位。
set_capacity(md->disk, size);
if (mmc_host_cmd23(card->host)) {
if ((mmc_card_mmc(card) &&
card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
(mmc_card_sd(card) &&
card->scr.cmds & SD_SCR_CMD23_SUPPORT))
md->flags |= MMC_BLK_CMD23;
}
if (mmc_card_mmc(card) &&
md->flags & MMC_BLK_CMD23 &&
((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
card->ext_csd.rel_sectors)) {
md->flags |= MMC_BLK_REL_WR;
blk_queue_write_cache(md->queue.queue, true, true);
}
return md;
err_putdisk:
put_disk(md->disk);
err_kfree:
kfree(md);
out:
ida_simple_remove(&mmc_blk_ida, devidx);
return ERR_PTR(ret);
}
mmc_init_queue()函数
//源码:drivers/mmc/core/queue.c
/**
* mmc_init_queue - initialise a queue structure.
* @mq: mmc queue
* @card: mmc card to attach this queue
*
* Initialise a MMC card request queue.
*/
// 两个重要的功能
// 1、创建request_queue并封装到mmc_queue中
// 2、创建IO请求(request)的提取和处理进程
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
{
struct mmc_host *host = card->host;
int ret;
mq->card = card;
mq->use_cqe = host->cqe_enabled;
spin_lock_init(&mq->lock);
memset(&mq->tag_set, 0, sizeof(mq->tag_set));
mq->tag_set.ops = &mmc_mq_ops;
/*
* The queue depth for CQE must match the hardware because the request
* tag is used to index the hardware queue.
*/
if (mq->use_cqe)
mq->tag_set.queue_depth =
min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
else
mq->tag_set.queue_depth = MMC_QUEUE_DEPTH;
mq->tag_set.numa_node = NUMA_NO_NODE;
mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
mq->tag_set.nr_hw_queues = 1;
mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
mq->tag_set.driver_data = mq;
/*
* Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops,
* the host->can_dma_map_merge should be set before to get max_segs
* from mmc_get_max_segments().
*/
if (mmc_merge_capable(host) &&
host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
dma_get_merge_boundary(mmc_dev(host)))
host->can_dma_map_merge = 1;
else
host->can_dma_map_merge = 0;
ret = blk_mq_alloc_tag_set(&mq->tag_set);
if (ret)
return ret;
/**
创建struct mmc_blk_data->mmc_queue->request_queue
(创建struct request_queue)
**/
mq->queue = blk_mq_init_queue(&mq->tag_set);
if (IS_ERR(mq->queue)) {
ret = PTR_ERR(mq->queue);
goto free_tag_set;
}
if (mmc_host_is_spi(host) && host->use_spi_crc)
mq->queue->backing_dev_info->capabilities |=
BDI_CAP_STABLE_WRITES;
mq->queue->queuedata = mq;
blk_queue_rq_timeout(mq->queue, 60 * HZ);
mmc_setup_queue(mq, card);
return 0;
free_tag_set:
blk_mq_free_tag_set(&mq->tag_set);
return ret;
}
//源码:drivers/mmc/core/queue.c
static const struct blk_mq_ops mmc_mq_ops = {
.queue_rq = mmc_mq_queue_rq,
.init_request = mmc_mq_init_request,
.exit_request = mmc_mq_exit_request,
.complete = mmc_blk_mq_complete,
.timeout = mmc_mq_timed_out,
};
//源码:drivers/mmc/core/block.c
static int mmc_add_disk(struct mmc_blk_data *md)
{
int ret;
struct mmc_card *card = md->queue.card;
/** 注册mmc_blk_data->gendisk到系统中 (注册块设备) **/
device_add_disk(md->parent, md->disk, NULL);
md->force_ro.show = force_ro_show;
md->force_ro.store = force_ro_store;
sysfs_attr_init(&md->force_ro.attr);
md->force_ro.attr.name = "force_ro";
md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
if (ret)
goto force_ro_fail;
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
card->ext_csd.boot_ro_lockable) {
umode_t mode;
if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
mode = S_IRUGO;
else
mode = S_IRUGO | S_IWUSR;
md->power_ro_lock.show = power_ro_lock_show;
md->power_ro_lock.store = power_ro_lock_store;
sysfs_attr_init(&md->power_ro_lock.attr);
md->power_ro_lock.attr.mode = mode;
md->power_ro_lock.attr.name =
"ro_lock_until_next_power_on";
ret = device_create_file(disk_to_dev(md->disk),
&md->power_ro_lock);
if (ret)
goto power_ro_lock_fail;
}
return ret;
power_ro_lock_fail:
device_remove_file(disk_to_dev(md->disk), &md->force_ro);
force_ro_fail:
del_gendisk(md->disk);
return ret;
}