linux内核之块设备三---块设备排入bio请求

该接口作为通用的块设备处理bio请求的方式,主要思路是尽可能的合并bio到请求request中,如果可能进一步将连续的request合并;如果不能合并bio,则新建请求,初始化后,plug
互斥方式:加锁,请求队列的锁
涉及到不同的调度算法:将新建的请求添加到调度器的调度队列,由unplug触发调度器,进而将请求按照特定的调度算法将请求派发到请求队列的派发队列。
/*块设备排入bio请求*/
int blk_queue_bio(struct request_queue *q, struct bio *bio)
{
    struct request *req;
    int el_ret;
    unsigned int bytes = bio->bi_size;
    const unsigned short prio = bio_prio(bio);
    const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
    const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
    const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
    int where = ELEVATOR_INSERT_SORT;
    int rw_flags;

    /* BIO_RW_BARRIER is deprecated */
    if (WARN_ONCE(bio_rw_flagged(bio, BIO_RW_BARRIER),
        "block: BARRIER is deprecated, use FLUSH/FUA instead\n")) {
        bio_endio(bio, -EOPNOTSUPP);
        return 0;
    }

    /*
     * low level driver can indicate that it wants pages above a
     * certain limit bounced to low memory (ie for highmem, or even
     * ISA dma in theory)
     */
    blk_queue_bounce(q, &bio);

    if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
        bio_endio(bio, -EIO);
        return 0;
    }

    spin_lock_irq(q->queue_lock);

    if (bio->bi_rw & (BIO_FLUSH | BIO_FUA)) {     //FLUSH的bio,直接申请新的request
        where = ELEVATOR_INSERT_FLUSH;
        goto get_rq;
    }

    if (elv_queue_empty(q))                                   //调度器队列为空,则无法合并bio请求,申请新的request
        goto get_rq;

    el_ret = elv_merge(q, &req, bio);                    //调度器尝试合并bio到req请求
    switch (el_ret) {
    case ELEVATOR_BACK_MERGE:                     //1、向后合并bio到req请求
        BUG_ON(!rq_mergeable(req));

        if (!ll_back_merge_fn(q, req, bio))               //1)查看是否超过请求request的最大字节,或是request的最大段数
            break;

        trace_block_bio_backmerge(q, bio);

        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
            blk_rq_set_mixed_merge(req);

        req->biotail->bi_next = bio;                       //2)将bio添加到req请求的尾部
        req->biotail = bio;
        req->__data_len += bytes;
        req->ioprio = ioprio_best(req->ioprio, prio);
        if (!blk_rq_cpu_valid(req))
            req->cpu = bio->bi_comp_cpu;
        drive_stat_acct(req, 0);
        elv_bio_merged(q, req, bio);                      //调度器合并bio到req请求
        if (!attempt_back_merge(q, req))               //3)继续尝试将请求req和其后面的请求合并,可以合并返回1,无法合并返回0
            elv_merged_request(q, req, el_ret);       //无法合并请求后的处理
        goto out;

    case ELEVATOR_FRONT_MERGE:                 //2、向前合并bio到req请求
        BUG_ON(!rq_mergeable(req));

        if (!ll_front_merge_fn(q, req, bio))            //1)查看是否超过请求request的最大字节,或request的最大段数
            break;

        trace_block_bio_frontmerge(q, bio);

        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
            blk_rq_set_mixed_merge(req);
            req->cmd_flags &= ~REQ_FAILFAST_MASK;
            req->cmd_flags |= ff;
        }

        bio->bi_next = req->bio;                         //2)将bio请求添加到req请求的头部
        req->bio = bio;

        /*
         * may not be valid. if the low level driver said
         * it didn't need a bounce buffer then it better
         * not touch req->buffer either...
         */
        req->buffer = bio_data(bio);               //修正请求的起始内存地址
        /*
         * The merge may happen accross partitions
         * We must update in_flight value accordingly
         */
        blk_account_io_front_merge(req, bio->bi_sector);
        req->__sector = bio->bi_sector;     //修正请求的起始物理扇区号,以及请求的大小
        req->__data_len += bytes;
        req->ioprio = ioprio_best(req->ioprio, prio);
        if (!blk_rq_cpu_valid(req))
            req->cpu = bio->bi_comp_cpu;
        drive_stat_acct(req, 0);
        elv_bio_merged(q, req, bio);
        if (!attempt_front_merge(q, req))          //3)继续尝试将请求req与其前面请求合并,可以合并返回1,无法合并返回0
            elv_merged_request(q, req, el_ret);     //无法合并请求后的处理
        goto out;

    /* ELV_NO_MERGE: elevator says don't/can't merge. */
    default:
        ;
    }

get_rq:
    /*
     * This sync check and mask will be re-done in init_request_from_bio(),
     * but we need to set it earlier to expose the sync flag to the
     * rq allocator and io schedulers.
     */
    rw_flags = bio_data_dir(bio);
    if (sync)
        rw_flags |= REQ_SYNC;

    /*
     * Grab a free request. This is might sleep but can not fail.
     * Returns with the queue unlocked.
     */
    req = get_request_wait(q, rw_flags, bio);          //3、阻塞式获取一个request请求
    if (unlikely(!req)) {
        bio_endio(bio, -ENODEV);    /* @q is dead */
        goto out_unlock;
    }

    /*
     * After dropping the lock and possibly sleeping here, our request
     * may now be mergeable after it had proven unmergeable (above).
     * We don't worry about that case for efficiency. It won't happen
     * often, and the elevators are able to handle it.
     */
    init_request_from_bio(req, bio);                    //4、使用bio来初始化请求req

    spin_lock_irq(q->queue_lock);
    if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
         bio_flagged(bio, BIO_CPU_AFFINE))
         req->cpu = raw_smp_processor_id();

    if (queue_should_plug(q) && elv_queue_empty(q))     //5、如果调度器队列为空,则plug请求队列
        blk_plug_device(q);

    /* insert the request into the elevator */
    drive_stat_acct(req, 1);
    __elv_add_request(q, req, where, 0);          //6、将请求添加到调度器的调度队列
out:
    if (unplug || !queue_should_plug(q))          //7、如果该bio为unplug,或是请求队列不应该plug,则unplug请求队列,依次处理请求
        __generic_unplug_device(q);
out_unlock:
    spin_unlock_irq(q->queue_lock);
    return 0;
}

/*是否可以合并该bio到请求队列的请求中,返回值为插入位置,req返回插入bio的请求*/
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
    struct elevator_queue *e = q->elevator;
    struct request *__rq;
    int ret;

    /*
     * First try one-hit cache.
     */
    if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {     //1、检查是否可以将bio合并到上次合并的请求
        ret = blk_try_merge(q->last_merge, bio);          //如果可以合并,尝试合并
        if (ret != ELEVATOR_NO_MERGE) {
            *req = q->last_merge;
            return ret;
        }
    }

    if (blk_queue_nomerges(q))
        return ELEVATOR_NO_MERGE;

    /*
     * See if our hash lookup can find a potential backmerge.
     */
    __rq = elv_rqhash_find(q, bio->bi_sector);          //2、在调度器hash表中查找是否存在可以合并的请求req,向后合并
    if (__rq && elv_rq_merge_ok(__rq, bio)) {
        *req = __rq;
        return ELEVATOR_BACK_MERGE;
    }

    if (e->ops->elevator_merge_fn)                         //3、调用调度器的合并bio的回调,尝试合并bio到请求req中
        return e->ops->elevator_merge_fn(q, req, bio);

    return ELEVATOR_NO_MERGE;
}

/*检查bio是否可以合并到请求req中,可以合并返回1,否则返回0*/
bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
    if (!blk_rq_merge_ok(rq, bio))
        return 0;

    if (!elv_iosched_allow_merge(rq, bio))
        return 0;

    return 1;
}

/*检查bio是否可以合并到请求req中,可以合并返回true,否则返回false*/
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
    if (!rq_mergeable(rq) || !bio_mergeable(bio))                              //只有文件系统的请求才可以合并
        return false;

    /* don't merge file system requests and discard requests */
    if ((bio->bi_rw & BIO_DISCARD) != (rq->bio->bi_rw & BIO_DISCARD))     //不能合并文件系统请求和discard请求
        return false;

    /* different data direction or already started, don't merge */
    if (bio_data_dir(bio) != rq_data_dir(rq))                                   //数据方向不同,不能合并
        return false;

    /* must be same device and not a special request */
    if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)          //不是相同设备的请求不能合并
        return false;

    /* only merge integrity protected bio into ditto rq */
    if (bio_integrity(bio) != blk_integrity_rq(rq))
        return false;

    return true;
}

/*尝试合并bio到请求rq*/
int blk_try_merge(struct request *rq, struct bio *bio)
{
    if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)     //请求的最后一个扇区和bio的起始扇区一致,向后合并
        return ELEVATOR_BACK_MERGE;
    else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)  //bio的最后一个扇区和请求的起始扇区一致,向前合并
        return ELEVATOR_FRONT_MERGE;
    return ELEVATOR_NO_MERGE;
}

/*尝试将rq请求与请求队列中其它请求向后合并,可以合并返回1,否则返回0*/
int attempt_back_merge(struct request_queue *q, struct request *rq)
{
    struct request *next = elv_latter_request(q, rq);     //查看rq请求的下一个请求

    if (next)
        return attempt_merge(q, rq, next);                    //尝试合并rq和其后请求next

    return 0;
}

/*尝试将请求req与next请求合并,可以合并返回1,不能合并返回0*/
static int attempt_merge(struct request_queue *q, struct request *req,
              struct request *next)
{
    if (!rq_mergeable(req) || !rq_mergeable(next))     //1、检查请求是否都可以合并
        return 0;

    /*
     * Don't merge file system requests and discard requests
     */
    if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))     //2、检查请求是否具有相同的discard属性
        return 0;

    /*
     * not contiguous
     */
    if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))     //3、检查两个请求的扇区是否连续
        return 0;

    if (rq_data_dir(req) != rq_data_dir(next)
        || req->rq_disk != next->rq_disk
        || next->special)                                                                      //4、检查请求的方向、设备是否相同
        return 0;

    if (blk_integrity_rq(req) != blk_integrity_rq(next))
        return 0;

    /*
     * If we are allowed to merge, then append bio list
     * from next to rq and release next. merge_requests_fn
     * will have updated segment counts, update sector
     * counts here.
     */
    if (!ll_merge_requests_fn(q, req, next))                                        //5、检查请求是否允许合并
        return 0;

    /*
     * If failfast settings disagree or any of the two is already
     * a mixed merge, mark both as mixed before proceeding.  This
     * makes sure that all involved bios have mixable attributes
     * set properly.
     */
    if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
        (req->cmd_flags & REQ_FAILFAST_MASK) !=
        (next->cmd_flags & REQ_FAILFAST_MASK)) {
        blk_rq_set_mixed_merge(req);
        blk_rq_set_mixed_merge(next);
    }

    /*
     * At this point we have either done a back merge
     * or front merge. We need the smaller start_time of
     * the merged requests to be the current request
     * for accounting purposes.
     */
    if (time_after(req->start_time, next->start_time))
        req->start_time = next->start_time;

    req->biotail->bi_next = next->bio;               //6、将next请求的bio链表挂入req请求的尾部,并更新req请求的大小
    req->biotail = next->biotail;

    req->__data_len += blk_rq_bytes(next);      

    elv_merge_requests(q, req, next);               //7、请求合并后的处理

    /*
     * 'next' is going away, so update stats accordingly
     */
    blk_account_io_merge(next);

    req->ioprio = ioprio_best(req->ioprio, next->ioprio);
    if (blk_rq_cpu_valid(next))
        req->cpu = next->cpu;

    /* owner-ship of bio passed from next to req */
    next->bio = NULL;
    __blk_put_request(q, next);                         //8、释放next请求,并返回1
    return 1;
}

/*检查请求是否允许合并,可以合并返回1,否则返回0*/
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
                struct request *next)
{
    int total_phys_segments;
    unsigned int seg_size =
        req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;     //

    /*
     * First check if the either of the requests are re-queued
     * requests.  Can't merge them if they are.
     */
    if (req->special || next->special)                    //1、检查请求是否是重新入队列的请求,重试请求
        return 0;

    /*
     * Will it become too large?
     */
    if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
        blk_rq_get_max_sectors(req))                         //2、检查请求合并后扇区是否超过阈值
        return 0;

    total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
    if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
        if (req->nr_phys_segments == 1)
            req->bio->bi_seg_front_size = seg_size;
        if (next->nr_phys_segments == 1)
            next->biotail->bi_seg_back_size = seg_size;
        total_phys_segments--;
    }

    if (total_phys_segments > queue_max_segments(q))     //3、检查请求合并后的物理段数是否超过阈值
        return 0;

    /* Merge is OK... */
    req->nr_phys_segments = total_phys_segments;
    return 1;
}

/*请求合并后的处理*/
void elv_merge_requests(struct request_queue *q, struct request *rq,
                 struct request *next)
{
    struct elevator_queue *e = q->elevator;

    if (e->ops->elevator_merge_req_fn)
        e->ops->elevator_merge_req_fn(q, rq, next);

    elv_rqhash_reposition(q, rq);     //1、更新请求rq在hash表中的位置
    elv_rqhash_del(q, next);            //2、将next请求从hash表中删除

    q->nr_sorted--;
    q->last_merge = rq;
}

/*无法合并请求后的处理*/
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
    struct elevator_queue *e = q->elevator;

    if (e->ops->elevator_merged_fn)
        e->ops->elevator_merged_fn(q, rq, type);

    if (type == ELEVATOR_BACK_MERGE)     //2、向后合并,更新rq在hash表中的位置
        elv_rqhash_reposition(q, rq);

    q->last_merge = rq;
}

/*将请求插入调度器的调度队列 */
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
               int plug)
{
    if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
        /* barriers are scheduling boundary, update end_sector */
        if (rq->cmd_type == REQ_TYPE_FS) {
            q->end_sector = rq_end_sector(rq);
            q->boundary_rq = rq;
        }
    } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
            where == ELEVATOR_INSERT_SORT)
        where = ELEVATOR_INSERT_BACK;

    if (plug)
        blk_plug_device(q);

    elv_insert(q, rq, where);     //将请求插入调度器的调度队列
}

/*将请求插入调度器的调度队列 */
void elv_insert(struct request_queue *q, struct request *rq, int where)
{
    int unplug_it = 1;

    trace_block_rq_insert(q, rq);

    rq->q = q;

    switch (where) {
    case ELEVATOR_INSERT_REQUEUE:
        /*
         * Most requeues happen because of a busy condition,
         * don't force unplug of the queue for that case.
         * Clear unplug_it and fall through.
         */
        unplug_it = 0;

    case ELEVATOR_INSERT_FRONT:          //不使用调度器,向前插入请求
        rq->cmd_flags |= REQ_SOFTBARRIER;
        list_add(&rq->queuelist, &q->queue_head);
        break;

    case ELEVATOR_INSERT_BACK:           //不适用调度器,向后插入请求
        rq->cmd_flags |= REQ_SOFTBARRIER;
        elv_drain_elevator(q);
        list_add_tail(&rq->queuelist, &q->queue_head);
        /*
         * We kick the queue here for the following reasons.
         * - The elevator might have returned NULL previously
         *   to delay requests and returned them now.  As the
         *   queue wasn't empty before this request, ll_rw_blk
         *   won't run the queue on return, resulting in hang.
         * - Usually, back inserted requests won't be merged
         *   with anything.  There's no point in delaying queue
         *   processing.
         */
        __blk_run_queue(q);
        break;

    case ELEVATOR_INSERT_SORT:                    //使用调度器
        BUG_ON(rq->cmd_type != REQ_TYPE_FS);
        rq->cmd_flags |= REQ_SORTED;
        q->nr_sorted++;
        if (rq_mergeable(rq)) {               //请求是否可以合并,文件系统请求才可以合并
            elv_rqhash_add(q, rq);                              //将请求添加到调度器队列的hash表中
            if (!q->last_merge)
                q->last_merge = rq;
        }

        /*
         * Some ioscheds (cfq) run q->request_fn directly, so
         * rq cannot be accessed after calling
         * elevator_add_req_fn.
         */
        q->elevator->ops->elevator_add_req_fn(q, rq);     //调度器自己的添加请求回调
        break;

    case ELEVATOR_INSERT_FLUSH:               //FLUSH的请求
        rq->cmd_flags |= REQ_SOFTBARRIER;
        blk_insert_flush(rq);
        break;

    default:
        printk(KERN_ERR "%s: bad insertion point %d\n",
               __func__, where);
        BUG();
    }

    if (unplug_it && blk_queue_plugged(q)) {
        int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
                - queue_in_flight(q);

        if (nrq >= q->unplug_thresh)          //达到unplug的阈值,则unplug请求队列,处理请求
            __generic_unplug_device(q);
    }
}

/*添加到调度器队列的hash表中*/
static void elv_rqhash_add(struct request_queue *q, struct request *rq)
{
    struct elevator_queue *e = q->elevator;

    BUG_ON(ELV_ON_HASH(rq));
    hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
}


你可能感兴趣的:(linux内核之块设备)