/*在指定node上分配请求队列*/
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
struct request_queue *q;
int err;
/*分配请求队列需要的内存,从slab中分配,并初始化为0*/
q = kmem_cache_alloc_node(blk_requestq_cachep,
gfp_mask | __GFP_ZERO, node_id);
if (!q)
return NULL;
if (percpu_counter_init(&q->mq_usage_counter, 0))
goto fail_q;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_c;
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
q->backing_dev_info.state = 0;
q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
q->backing_dev_info.name = "block";
q->node = node_id;
err = bdi_init(&q->backing_dev_info);
if (err)
goto fail_id;
/*设置laptop模式下的定时器*/
setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
laptop_mode_timer_fn, (unsigned long) q);
/*
* 关键点:设置请求队列的超时定时器,默认超时时间为30s,当30s内IO请求未完成时,定时器到期,
* 进行重试或错误处理。这是IO 错误处理架构中的关键点之一,在内核老版本中(2.6.38?),该定时器
* 是在scsi中间层定义的,新版本中将其上移至块设备层。Fixme:为何要这样?*/
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
/*初始化各个队列*/
INIT_LIST_HEAD(&q->queue_head);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
INIT_LIST_HEAD(&q->blkg_list);
#endif
INIT_LIST_HEAD(&q->flush_queue[0]);
INIT_LIST_HEAD(&q->flush_queue[1]);
INIT_LIST_HEAD(&q->flush_data_in_flight);
/*初始化delay_work,用于在kblockd中异步unplug请求队列*/
INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
kobject_init(&q->kobj, &blk_queue_ktype);
mutex_init(&q->sysfs_lock);
spin_lock_init(&q->__queue_lock);
/*
* By default initialize queue_lock to internal lock and driver can
* override it later if need be.
*/
q->queue_lock = &q->__queue_lock;
/*
* A queue starts its life with bypass turned on to avoid
* unnecessary bypass on/off overhead and nasty surprises during
* init. The initial bypass will be finished when the queue is
* registered by blk_register_queue().
*/
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
init_waitqueue_head(&q->mq_freeze_wq);
if (blkcg_init_queue(q))
goto fail_id;
return q;
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_c:
percpu_counter_destroy(&q->mq_usage_counter);
fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
kblockd工作队列的工作内容有由blk_delay_work()函数实现,主要就是调用__blk_run_queue进行unplug请求队列。
/*IO请求队列的delay_work,用于在kblockd中异步unplug请求队列*/
static void blk_delay_work(struct work_struct *work)
{
struct request_queue *q;
/*获取delay_work所在的请求队列*/
q = container_of(work, struct request_queue, delay_work.work);
spin_lock_irq(q->queue_lock);
/*直接run queue,最终调用request_fn对队列中的请求逐一处理*/
__blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
}
/*unplug请求队列,plug相当于蓄水,将请求放入池子(请求队列)中,unplug相当于放水,即开始调用请求队列的request_fn(scsi_request_fn)来处理请求队列中的请求,将请求提交到scsi层(块设备驱动层)*/
static void queue_unplugged(struct request_queue *q, unsigned int depth,
bool from_schedule)
__releases(q->queue_lock)
{
trace_block_unplug(q, depth, !from_schedule);
/*调用块设备驱动层提供的request_fn接口处理请求队列中的请求,分异步和同步两种情况。*/
if (from_schedule)
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
blk_run_queue_async(q);
else
/*同步unplug,即直接调用设备驱动层提供的request_fn接口处理请求队列中的请求*/
__blk_run_queue(q);
spin_unlock(q->queue_lock);
}
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
void blk_run_queue_async(struct request_queue *q)
{
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
/*唤醒kblockd相关的工作队列,进行unplug处理,注意:这里的delay传入0表示立刻唤醒,kblockd对应的处理接口为:blk_delay_work*/
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
void blk_run_queue_async(struct request_queue *q)
{
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
/*唤醒kblockd相关的工作队列,进行unplug处理,注意:这里的delay传入0表示立刻唤醒,kblockd对应的处理接口为:blk_delay_work*/
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
/*异步unplug,即通过kblockd工作队列来处理,该工作队列定期唤醒(5s),通过这种方式可以控制流量,提高吞吐量*/
void blk_run_queue_async(struct request_queue *q)
{
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
/*唤醒kblockd相关的工作队列,进行unplug处理,注意:这里的delay传入0表示立刻唤醒,kblockd对应的处理接口为:blk_delay_work*/
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
转载请访问原文地址:http://blog.chinaunix.net/xmlrpc.php?r=blog/article&uid=14528823&id=4778396