该接口作为通用的块设备处理bio请求的方式,主要思路是尽可能的合并bio到请求request中,如果可能进一步将连续的request合并;如果不能合并bio,则新建请求,初始化后,plug
涉及到不同的调度算法:将新建的请求添加到调度器的调度队列,由unplug触发调度器,进而将请求按照特定的调度算法将请求派发到请求队列的派发队列。
/*块设备排入bio请求*/
int blk_queue_bio(struct request_queue *q, struct bio *bio)
{
struct request *req;
int el_ret;
unsigned int bytes = bio->bi_size;
const unsigned short prio = bio_prio(bio);
const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
int where = ELEVATOR_INSERT_SORT;
int rw_flags;
/* BIO_RW_BARRIER is deprecated */
if (WARN_ONCE(bio_rw_flagged(bio, BIO_RW_BARRIER),
"block: BARRIER is deprecated, use FLUSH/FUA instead\n")) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
blk_queue_bounce(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_endio(bio, -EIO);
return 0;
}
spin_lock_irq(q->queue_lock);
if (bio->bi_rw & (BIO_FLUSH | BIO_FUA)) { //FLUSH的bio,直接申请新的request
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
if (elv_queue_empty(q)) //调度器队列为空,则无法合并bio请求,申请新的request
goto get_rq;
el_ret = elv_merge(q, &req, bio); //调度器尝试合并bio到req请求
switch (el_ret) {
case ELEVATOR_BACK_MERGE: //1、向后合并bio到req请求
BUG_ON(!rq_mergeable(req));
if (!ll_back_merge_fn(q, req, bio)) //1)查看是否超过请求request的最大字节,或是request的最大段数
break;
trace_block_bio_backmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
req->biotail->bi_next = bio; //2)将bio添加到req请求的尾部
req->biotail = bio;
req->__data_len += bytes;
req->ioprio = ioprio_best(req->ioprio, prio);
if (!blk_rq_cpu_valid(req))
req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
elv_bio_merged(q, req, bio); //调度器合并bio到req请求
if (!attempt_back_merge(q, req)) //3)继续尝试将请求req和其后面的请求合并,可以合并返回1,无法合并返回0
elv_merged_request(q, req, el_ret); //无法合并请求后的处理
goto out;
case ELEVATOR_FRONT_MERGE: //2、向前合并bio到req请求
BUG_ON(!rq_mergeable(req));
if (!ll_front_merge_fn(q, req, bio)) //1)查看是否超过请求request的最大字节,或request的最大段数
break;
trace_block_bio_frontmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
blk_rq_set_mixed_merge(req);
req->cmd_flags &= ~REQ_FAILFAST_MASK;
req->cmd_flags |= ff;
}
bio->bi_next = req->bio; //2)将bio请求添加到req请求的头部
req->bio = bio;
/*
* may not be valid. if the low level driver said
* it didn't need a bounce buffer then it better
* not touch req->buffer either...
*/
req->buffer = bio_data(bio); //修正请求的起始内存地址
/*
* The merge may happen accross partitions
* We must update in_flight value accordingly
*/
blk_account_io_front_merge(req, bio->bi_sector);
req->__sector = bio->bi_sector; //修正请求的起始物理扇区号,以及请求的大小
req->__data_len += bytes;
req->ioprio = ioprio_best(req->ioprio, prio);
if (!blk_rq_cpu_valid(req))
req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req)) //3)继续尝试将请求req与其前面请求合并,可以合并返回1,无法合并返回0
elv_merged_request(q, req, el_ret); //无法合并请求后的处理
goto out;
/* ELV_NO_MERGE: elevator says don't/can't merge. */
default:
;
}
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
rw_flags = bio_data_dir(bio);
if (sync)
rw_flags |= REQ_SYNC;
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request_wait(q, rw_flags, bio); //3、阻塞式获取一个request请求
if (unlikely(!req)) {
bio_endio(bio, -ENODEV); /* @q is dead */
goto out_unlock;
}
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
init_request_from_bio(req, bio); //4、使用bio来初始化请求req
spin_lock_irq(q->queue_lock);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = raw_smp_processor_id();
if (queue_should_plug(q) && elv_queue_empty(q)) //5、如果调度器队列为空,则plug请求队列
blk_plug_device(q);
/* insert the request into the elevator */
drive_stat_acct(req, 1);
__elv_add_request(q, req, where, 0); //6、将请求添加到调度器的调度队列
out:
if (unplug || !queue_should_plug(q)) //7、如果该bio为unplug,或是请求队列不应该plug,则unplug请求队列,依次处理请求
__generic_unplug_device(q);
out_unlock:
spin_unlock_irq(q->queue_lock);
return 0;
}
/*是否可以合并该bio到请求队列的请求中,返回值为插入位置,req返回插入bio的请求*/
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
struct request *__rq;
int ret;
/*
* First try one-hit cache.
*/
if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) { //1、检查是否可以将bio合并到上次合并的请求
ret = blk_try_merge(q->last_merge, bio); //如果可以合并,尝试合并
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
return ret;
}
}
if (blk_queue_nomerges(q))
return ELEVATOR_NO_MERGE;
/*
* See if our hash lookup can find a potential backmerge.
*/
__rq = elv_rqhash_find(q, bio->bi_sector); //2、在调度器hash表中查找是否存在可以合并的请求req,向后合并
if (__rq && elv_rq_merge_ok(__rq, bio)) {
*req = __rq;
return ELEVATOR_BACK_MERGE;
}
if (e->ops->elevator_merge_fn) //3、调用调度器的合并bio的回调,尝试合并bio到请求req中
return e->ops->elevator_merge_fn(q, req, bio);
return ELEVATOR_NO_MERGE;
}
/*检查bio是否可以合并到请求req中,可以合并返回1,否则返回0*/
bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!blk_rq_merge_ok(rq, bio))
return 0;
if (!elv_iosched_allow_merge(rq, bio))
return 0;
return 1;
}
/*检查bio是否可以合并到请求req中,可以合并返回true,否则返回false*/
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!rq_mergeable(rq) || !bio_mergeable(bio)) //只有文件系统的请求才可以合并
return false;
/* don't merge file system requests and discard requests */
if ((bio->bi_rw & BIO_DISCARD) != (rq->bio->bi_rw & BIO_DISCARD)) //不能合并文件系统请求和discard请求
return false;
/* different data direction or already started, don't merge */
if (bio_data_dir(bio) != rq_data_dir(rq)) //数据方向不同,不能合并
return false;
/* must be same device and not a special request */
if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) //不是相同设备的请求不能合并
return false;
/* only merge integrity protected bio into ditto rq */
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
return true;
}
/*尝试合并bio到请求rq*/
int blk_try_merge(struct request *rq, struct bio *bio)
{
if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector) //请求的最后一个扇区和bio的起始扇区一致,向后合并
return ELEVATOR_BACK_MERGE;
else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector) //bio的最后一个扇区和请求的起始扇区一致,向前合并
return ELEVATOR_FRONT_MERGE;
return ELEVATOR_NO_MERGE;
}
/*尝试将rq请求与请求队列中其它请求向后合并,可以合并返回1,否则返回0*/
int attempt_back_merge(struct request_queue *q, struct request *rq)
{
struct request *next = elv_latter_request(q, rq); //查看rq请求的下一个请求
if (next)
return attempt_merge(q, rq, next); //尝试合并rq和其后请求next
return 0;
}
/*尝试将请求req与next请求合并,可以合并返回1,不能合并返回0*/
static int attempt_merge(struct request_queue *q, struct request *req,
struct request *next)
{
if (!rq_mergeable(req) || !rq_mergeable(next)) //1、检查请求是否都可以合并
return 0;
/*
* Don't merge file system requests and discard requests
*/
if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) //2、检查请求是否具有相同的discard属性
return 0;
/*
* not contiguous
*/
if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) //3、检查两个请求的扇区是否连续
return 0;
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
|| next->special) //4、检查请求的方向、设备是否相同
return 0;
if (blk_integrity_rq(req) != blk_integrity_rq(next))
return 0;
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
* will have updated segment counts, update sector
* counts here.
*/
if (!ll_merge_requests_fn(q, req, next)) //5、检查请求是否允许合并
return 0;
/*
* If failfast settings disagree or any of the two is already
* a mixed merge, mark both as mixed before proceeding. This
* makes sure that all involved bios have mixable attributes
* set properly.
*/
if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
(req->cmd_flags & REQ_FAILFAST_MASK) !=
(next->cmd_flags & REQ_FAILFAST_MASK)) {
blk_rq_set_mixed_merge(req);
blk_rq_set_mixed_merge(next);
}
/*
* At this point we have either done a back merge
* or front merge. We need the smaller start_time of
* the merged requests to be the current request
* for accounting purposes.
*/
if (time_after(req->start_time, next->start_time))
req->start_time = next->start_time;
req->biotail->bi_next = next->bio; //6、将next请求的bio链表挂入req请求的尾部,并更新req请求的大小
req->biotail = next->biotail;
req->__data_len += blk_rq_bytes(next);
elv_merge_requests(q, req, next); //7、请求合并后的处理
/*
* 'next' is going away, so update stats accordingly
*/
blk_account_io_merge(next);
req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
req->cpu = next->cpu;
/* owner-ship of bio passed from next to req */
next->bio = NULL;
__blk_put_request(q, next); //8、释放next请求,并返回1
return 1;
}
/*检查请求是否允许合并,可以合并返回1,否则返回0*/
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
int total_phys_segments;
unsigned int seg_size =
req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; //
/*
* First check if the either of the requests are re-queued
* requests. Can't merge them if they are.
*/
if (req->special || next->special) //1、检查请求是否是重新入队列的请求,重试请求
return 0;
/*
* Will it become too large?
*/
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
blk_rq_get_max_sectors(req)) //2、检查请求合并后扇区是否超过阈值
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
if (req->nr_phys_segments == 1)
req->bio->bi_seg_front_size = seg_size;
if (next->nr_phys_segments == 1)
next->biotail->bi_seg_back_size = seg_size;
total_phys_segments--;
}
if (total_phys_segments > queue_max_segments(q)) //3、检查请求合并后的物理段数是否超过阈值
return 0;
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
return 1;
}
/*请求合并后的处理*/
void elv_merge_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merge_req_fn)
e->ops->elevator_merge_req_fn(q, rq, next);
elv_rqhash_reposition(q, rq); //1、更新请求rq在hash表中的位置
elv_rqhash_del(q, next); //2、将next请求从hash表中删除
q->nr_sorted--;
q->last_merge = rq;
}
/*无法合并请求后的处理*/
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merged_fn)
e->ops->elevator_merged_fn(q, rq, type);
if (type == ELEVATOR_BACK_MERGE) //2、向后合并,更新rq在hash表中的位置
elv_rqhash_reposition(q, rq);
q->last_merge = rq;
}
/*将请求插入调度器的调度队列 */
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
} else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
where == ELEVATOR_INSERT_SORT)
where = ELEVATOR_INSERT_BACK;
if (plug)
blk_plug_device(q);
elv_insert(q, rq, where); //将请求插入调度器的调度队列
}
/*将请求插入调度器的调度队列 */
void elv_insert(struct request_queue *q, struct request *rq, int where)
{
int unplug_it = 1;
trace_block_rq_insert(q, rq);
rq->q = q;
switch (where) {
case ELEVATOR_INSERT_REQUEUE:
/*
* Most requeues happen because of a busy condition,
* don't force unplug of the queue for that case.
* Clear unplug_it and fall through.
*/
unplug_it = 0;
case ELEVATOR_INSERT_FRONT: //不使用调度器,向前插入请求
rq->cmd_flags |= REQ_SOFTBARRIER;
list_add(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_BACK: //不适用调度器,向后插入请求
rq->cmd_flags |= REQ_SOFTBARRIER;
elv_drain_elevator(q);
list_add_tail(&rq->queuelist, &q->queue_head);
/*
* We kick the queue here for the following reasons.
* - The elevator might have returned NULL previously
* to delay requests and returned them now. As the
* queue wasn't empty before this request, ll_rw_blk
* won't run the queue on return, resulting in hang.
* - Usually, back inserted requests won't be merged
* with anything. There's no point in delaying queue
* processing.
*/
__blk_run_queue(q);
break;
case ELEVATOR_INSERT_SORT: //使用调度器
BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) { //请求是否可以合并,文件系统请求才可以合并
elv_rqhash_add(q, rq); //将请求添加到调度器队列的hash表中
if (!q->last_merge)
q->last_merge = rq;
}
/*
* Some ioscheds (cfq) run q->request_fn directly, so
* rq cannot be accessed after calling
* elevator_add_req_fn.
*/
q->elevator->ops->elevator_add_req_fn(q, rq); //调度器自己的添加请求回调
break;
case ELEVATOR_INSERT_FLUSH: //FLUSH的请求
rq->cmd_flags |= REQ_SOFTBARRIER;
blk_insert_flush(rq);
break;
default:
printk(KERN_ERR "%s: bad insertion point %d\n",
__func__, where);
BUG();
}
if (unplug_it && blk_queue_plugged(q)) {
int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
- queue_in_flight(q);
if (nrq >= q->unplug_thresh) //达到unplug的阈值,则unplug请求队列,处理请求
__generic_unplug_device(q);
}
}
/*添加到调度器队列的hash表中*/
static void elv_rqhash_add(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
BUG_ON(ELV_ON_HASH(rq));
hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
}