blk_mq_init_queue函数学习记录

blk-mq编程,主要要调用两个函数进行初始化工作,blk_mq_init_queue这是第二个。该函数先是申请了struct request_queue结构,这个请求队列后面用于赋值给磁盘那个结构体的相应成员。

struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
	struct request_queue *uninit_q, *q;

	//分配struct request_queue并初始化
	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
	if (!uninit_q)
		return ERR_PTR(-ENOMEM);
	/*
		1:分配每个cpu专属的软件队列并初始化
		2:分配硬件队列,并初始化
		3:建立软件队列和硬件队列的联系
	*/
	q = blk_mq_init_allocated_queue(set, uninit_q);
	if (IS_ERR(q))
		blk_cleanup_queue(uninit_q);
	return q;
}
EXPORT_SYMBOL(blk_mq_init_queue);

blk_mq_init_allocated_queue函数分析

一眼望过去,确实有点复杂,不过,多看几遍就好了,
这里面,主要就是给struct request_queue *q结构体里面的成员变量赋值的,简单的变量赋值就不分析了,看看它调用的函数进行分析。

struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q)
{
	/* mark the queue as mq asap */
	q->mq_ops = set->ops;
	q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, blk_mq_poll_stats_bkt, BLK_MQ_POLL_STATS_BKTS, q);
	if (!q->poll_cb)
		goto err_exit;

	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);//percpu变量 软件队列
	if (!q->queue_ctx)
		goto err_exit;

	/* init q->mq_kobj and sw queues' kobjects */
	blk_mq_sysfs_init(q); //主要是初始化kobject变量

	//二级指针 硬件队列
	q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node);
	if (!q->queue_hw_ctx)
		goto err_percpu;

	//赋值q->mq_map,这个数组保存了每个CPU对应的硬件队列编号
	q->mq_map = set->mq_map;

	blk_mq_realloc_hw_ctxs(set, q);
	if (!q->nr_hw_queues)
		goto err_hctxs;

	/*定时器初始化,设置超时时间*/
	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);

	q->nr_queues = nr_cpu_ids;
	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;

	if (!(set->flags & BLK_MQ_F_SG_MERGE))
		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);

	q->sg_reserved_size = INT_MAX;

	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
	INIT_LIST_HEAD(&q->requeue_list);
	spin_lock_init(&q->requeue_lock);

	blk_queue_make_request(q, blk_mq_make_request);
	if (q->mq_ops->poll)
		q->poll_fn = blk_mq_poll;
	/*
	 * Do this after blk_queue_make_request() overrides it...
	 */
	q->nr_requests = set->queue_depth; //防止被覆盖
	/*
	 * Default to classic polling
	 */
	q->poll_nsec = -1;

	if (set->ops->complete)
		blk_queue_softirq_done(q, set->ops->complete);

	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
	blk_mq_add_queue_tag_set(set, q);
	blk_mq_map_swqueue(q);
	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
		int ret;
		ret = elevator_init_mq(q);
		if (ret)
			return ERR_PTR(ret);
	}
	return q;
err_hctxs:
	kfree(q->queue_hw_ctx);
err_percpu:
	free_percpu(q->queue_ctx);
err_exit:
	q->mq_ops = NULL;
	return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);

blk_stat_alloc_callback

这个函数一看也没什么分析的,主要也是给poll_cb进行赋值,采用了很多的默认函数进行赋值。

struct blk_stat_callback *
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
			int (*bucket_fn)(const struct request *), unsigned int buckets, void *data)
{
	struct blk_stat_callback *cb;

	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
	if (!cb)
		return NULL;

	cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat), GFP_KERNEL);
	if (!cb->stat) {
		kfree(cb);
		return NULL;
	}
	cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), __alignof__(struct blk_rq_stat));
	if (!cb->cpu_stat) {
		kfree(cb->stat);
		kfree(cb);
		return NULL;
	}

	cb->timer_fn = timer_fn;
	cb->bucket_fn = bucket_fn;
	cb->data = data;
	cb->buckets = buckets;
	timer_setup(&cb->timer, blk_stat_timer_fn, 0);
	return cb;
}
EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);

blk_mq_sysfs_init

接着到这个函数,也是变量的初始化工作,struct request_queue队列里面的kobject变量初始化,以及取出在每一个cpu上q->queue_ctx结构体,然后对齐成员kobject变量进行初始化。

void blk_mq_sysfs_init(struct request_queue *q)
{
	struct blk_mq_ctx *ctx;
	int cpu;

	kobject_init(&q->mq_kobj, &blk_mq_ktype);

	for_each_possible_cpu(cpu) {
		ctx = per_cpu_ptr(q->queue_ctx, cpu);//返回每个cpu上的q->queue_ctx变量的首地址
		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
	}
}

blk_mq_realloc_hw_ctxs

这个函数相对来说比较重要。后面再补充。

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct request_queue *q)
{
	int i, j;
	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;

	blk_mq_sysfs_unregister(q);
	/* protect against switching io scheduler  */
	mutex_lock(&q->sysfs_lock);
	for (i = 0; i < set->nr_hw_queues; i++) {
		int node;

		if (hctxs[i])
			continue;
		/*
			根据set->nr_hw_queues的值分配struct blk_mq_hw_ctx(硬件队列结构体个数)
		*/
		node = blk_mq_hw_queue_to_node(q->mq_map, i);
		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), GFP_KERNEL, node);
		if (!hctxs[i])
			break;
		/*
			以下几个赋值操作都是相当于对struct blk_mq_hw_ctx结构体的
			成员进行初始化操作(指针类型的变量就是申请内存,普通变量就是赋值)
		*/
		if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, node)) {
			kfree(hctxs[i]);
			hctxs[i] = NULL;
			break;
		}
		atomic_set(&hctxs[i]->nr_active, 0);
		hctxs[i]->numa_node = node;
		hctxs[i]->queue_num = i;
		/*
			从函数名称也不难看出,该函数是对hctxs[i]的大部分成员变量进行初始化工作
		*/
		if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
			free_cpumask_var(hctxs[i]->cpumask);
			kfree(hctxs[i]);
			hctxs[i] = NULL;
			break;
		}
		blk_mq_hctx_kobj_init(hctxs[i]);
	}

	for (j = i; j < q->nr_hw_queues; j++) { //正常不会走到这里,这里类似于goto的处理?
		struct blk_mq_hw_ctx *hctx = hctxs[j];
		if (hctx) {
			if (hctx->tags)
				blk_mq_free_map_and_requests(set, j);
			blk_mq_exit_hctx(q, set, hctx, j);
			kobject_put(&hctx->kobj);
			hctxs[j] = NULL;
		}
	}
	q->nr_hw_queues = i;
	mutex_unlock(&q->sysfs_lock);
	blk_mq_sysfs_register(q);
}

blk_queue_make_request

这个函数也是给q的其成员赋值的,先大概熟悉一下,如果有实际的调试分析,需要了解某个参数的值,到时再回来看吧。

void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
{
	/*
	 * set defaults
	 */
	q->nr_requests = BLKDEV_MAX_RQ; //这个值后面会重新赋值进行覆盖

	q->make_request_fn = mfn;
	blk_queue_dma_alignment(q, 511);
	blk_queue_congestion_threshold(q);
	q->nr_batching = BLK_BATCH_REQ;

	blk_set_default_limits(&q->limits);
}
EXPORT_SYMBOL(blk_queue_make_request);

blk_queue_softirq_done

也是赋值,IO操作完成时,会调用这个回调函数。

void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{
	q->softirq_done_fn = fn;
}
EXPORT_SYMBOL(blk_queue_softirq_done);

blk_mq_init_cpu_queues

static void blk_mq_init_cpu_queues(struct request_queue *q, unsigned int nr_hw_queues)
{
	unsigned int i;
	for_each_possible_cpu(i) { //硬件队列数
		/*返回这个变量在编号为i的cpu上的起始地址*/
		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
		struct blk_mq_hw_ctx *hctx;

		//其成员做一些赋值操作
		__ctx->cpu = i;
		spin_lock_init(&__ctx->lock);
		INIT_LIST_HEAD(&__ctx->rq_list);
		__ctx->queue = q;
		/*
		 * Set local node, IFF we have more than one hw queue. If
		 * not, we remain on the home node of the device
		 */
		hctx = blk_mq_map_queue(q, i); //取出每个cpu上的硬件队列
		if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
			hctx->numa_node = local_memory_node(cpu_to_node(i));
	}
}

blk_mq_add_queue_tag_set


blk_mq_map_swqueue


你可能感兴趣的:(#,block,linux)