Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)

一旦这种狼狈为奸的关系建立好了,就可以开始执行请求了.来看blk_execute_rq(),来自block/ll_rw_blk.c:

   2605 /**

   2606  * blk_execute_rq - insert a request into queue for execution

   2607  * @q:          queue to insert the request in

   2608  * @bd_disk:    matching gendisk

   2609  * @rq:         request to insert

   2610  * @at_head:    insert request at head or tail of queue

   2611  *

   2612  * Description:

   2613  *    Insert a fully prepared request at the back of the io scheduler queue

   2614  *    for execution and wait for completion.

   2615  */

   2616 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,

   2617                    struct request *rq, int at_head)

   2618 {

   2619         DECLARE_COMPLETION_ONSTACK(wait);

   2620         char sense[SCSI_SENSE_BUFFERSIZE];

   2621         int err = 0;

   2622

   2623         /*

   2624          * we need an extra reference to the request, so we can look at

   2625          * it after io completion

   2626          */

   2627         rq->ref_count++;

   2628

   2629         if (!rq->sense) {

   2630                 memset(sense, 0, sizeof(sense));

   2631                 rq->sense = sense;

   2632                 rq->sense_len = 0;

   2633         }

   2634

   2635         rq->end_io_data = &wait;

   2636         blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);

   2637         wait_for_completion(&wait);

   2638

   2639         if (rq->errors)

   2640                 err = -EIO;

   2641

   2642         return err;

   2643 }

抛去那些用于错误处理的代码,这个函数真正有意义的代码就是两行, blk_execute_rq_nowaitwait_for_completion.先看前者,来自block/ll_rw_blk.c:

   2576 /**

   2577  * blk_execute_rq_nowait - insert a request into queue for execution

   2578  * @q:          queue to insert the request in

   2579  * @bd_disk:    matching gendisk

   2580  * @rq:         request to insert

   2581  * @at_head:    insert request at head or tail of queue

   2582  * @done:       I/O completion handler

   2583  *

   2584  * Description:

   2585  *    Insert a fully prepared request at the back of the io scheduler queue

   2586  *    for execution.  Don't wait for completion.

   2587  */

   2588 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,

   2589                            struct request *rq, int at_head,

   2590                            rq_end_io_fn *done)

   2591 {

   2592         int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

   2593

   2594         rq->rq_disk = bd_disk;

   2595         rq->cmd_flags |= REQ_NOMERGE;

   2596         rq->end_io = done;

   2597         WARN_ON(irqs_disabled());

   2598         spin_lock_irq(q->queue_lock);

   2599         __elv_add_request(q, rq, where, 1);

   2600         __generic_unplug_device(q);

   2601         spin_unlock_irq(q->queue_lock);

   2602 }

首先at_head是表示往哪插.(…,该不会还有一个参数表示用什么姿势插吧.)

where用来记录at_head的值.在我们这个上下文中,at_head是从scsi_execute()中调用blk_execute_rq的时候传递下来的,当时我们设置的是1.于是where被设置为ELEVATOR_INSERT_FRONT.这几个宏来自include/linux/elevator.h:

    155 /*

    156  * Insertion selection

    157  */

    158 #define ELEVATOR_INSERT_FRONT   1

    159 #define ELEVATOR_INSERT_BACK    2

    160 #define ELEVATOR_INSERT_SORT    3

    161 #define ELEVATOR_INSERT_REQUEUE 4

很明显,这是告诉我们从前面插,还算不是太变态.那么带着这个where我们进入下一个函数,__elv_add_request.来自block/elevator.c:

    646 void __elv_add_request(request_queue_t *q, struct request *rq, int where,

    647                        int plug)

    648 {

    649         if (q->ordcolor)

    650                 rq->cmd_flags |= REQ_ORDERED_COLOR;

    651

    652         if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {

    653                 /*

    654                  * toggle ordered color

    655                  */

    656                 if (blk_barrier_rq(rq))

    657                         q->ordcolor ^= 1;

    658

    659                 /*

    660                  * barriers implicitly indicate back insertion

    661                  */

    662                 if (where == ELEVATOR_INSERT_SORT)

    663                         where = ELEVATOR_INSERT_BACK;

    664

    665                 /*

    666                  * this request is scheduling boundary, update

    667                  * end_sector

    668                  */

    669                 if (blk_fs_request(rq)) {

    670                         q->end_sector = rq_end_sector(rq);

    671                         q->boundary_rq = rq;

    672                 }

    673         } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)

    674                 where = ELEVATOR_INSERT_BACK;

    675

    676         if (plug)

    677                 blk_plug_device(q);

    678

    679         elv_insert(q, rq, where);

    680 }

传入的参数plug等于1,所以blk_plug_device()会被执行.暂且先不管这个函数.

很明显,前面都和我们无关,直接跳到最后一行这个elv_insert().

    548 void elv_insert(request_queue_t *q, struct request *rq, int where)

    549 {

    550         struct list_head *pos;

    551         unsigned ordseq;

    552         int unplug_it = 1;

    553

    554         blk_add_trace_rq(q, rq, BLK_TA_INSERT);

    555

    556         rq->q = q;

    557

    558         switch (where) {

    559         case ELEVATOR_INSERT_FRONT:

    560                 rq->cmd_flags |= REQ_SOFTBARRIER;

    561

    562                 list_add(&rq->queuelist, &q->queue_head);

    563                 break;

    564

    565         case ELEVATOR_INSERT_BACK:

    566                 rq->cmd_flags |= REQ_SOFTBARRIER;

    567                 elv_drain_elevator(q);

    568                 list_add_tail(&rq->queuelist, &q->queue_head);

    569                 /*

    570                  * We kick the queue here for the following reasons.

    571                  * - The elevator might have returned NULL previously

    572                  *   to delay requests and returned them now.  As the

    573                  *   queue wasn't empty before this request, ll_rw_blk

    574                  *   won't run the queue on return, resulting in hang.

    575                  * - Usually, back inserted requests won't be merged

    576                  *   with anything.  There's no point in delaying queue

    577                  *   processing.

    578                  */

    579                 blk_remove_plug(q);

    580                 q->request_fn(q);

    581                 break;

    582

    583         case ELEVATOR_INSERT_SORT:

    584                 BUG_ON(!blk_fs_request(rq));

    585                 rq->cmd_flags |= REQ_SORTED;

    586                 q->nr_sorted++;

587                 if (rq_mergeable(rq)) {

    588                         elv_rqhash_add(q, rq);

    589                         if (!q->last_merge)

    590                                 q->last_merge = rq;

    591                 }

    592

    593                 /*

    594                  * Some ioscheds (cfq) run q->request_fn directly, so

    595                  * rq cannot be accessed after calling

    596                  * elevator_add_req_fn.

    597                  */

    598                 q->elevator->ops->elevator_add_req_fn(q, rq);

    599                 break;

    600

    601         case ELEVATOR_INSERT_REQUEUE:

    602                 /*

    603                  * If ordered flush isn't in progress, we do front

    604                  * insertion; otherwise, requests should be requeued

    605                  * in ordseq order.

    606                  */

    607                 rq->cmd_flags |= REQ_SOFTBARRIER;

    608

    609                 /*

    610                  * Most requeues happen because of a busy condition,

    611                  * don't force unplug of the queue for that case.

    612                  */

    613                 unplug_it = 0;

    614

    615                 if (q->ordseq == 0) {

    616                         list_add(&rq->queuelist, &q->queue_head);

    617                         break;

    618                 }

    619

    620                 ordseq = blk_ordered_req_seq(rq);

    621

    622                 list_for_each(pos, &q->queue_head) {

    623                         struct request *pos_rq = list_entry_rq(pos);

    624                         if (ordseq <= blk_ordered_req_seq(pos_rq))

    625                                 break;

    626                 }

    627

    628                 list_add_tail(&rq->queuelist, pos);

    629                 break;

    630

    631         default:

    632                 printk(KERN_ERR "%s: bad insertion point %d/n",

    633                        __FUNCTION__, where);

    634                 BUG();

    635         }

    636

    637         if (unplug_it && blk_queue_plugged(q)) {

    638                 int nrq = q->rq.count[READ] + q->rq.count[WRITE]

    639                         - q->in_flight;

    640

    641                 if (nrq >= q->unplug_thresh)

    642                         __generic_unplug_device(q);

    643         }

    644 }

由于我们是从前面插,所以我们执行562行这个list_add,struct request有一个成员struct list_head queuelist,struct request_queue有一个成员struct list_head queue_head,所以我们就把前者插入到后者所代表的这个队伍中来.然后咱们就返回了.

回到blk_execute_rq_nowait(),下一个被调用的函数是__generic_unplug_device,依然是来自block/ll_rw_blk.c:

   1586 /*

   1587  * remove the plug and let it rip..

   1588  */

   1589 void __generic_unplug_device(request_queue_t *q)

   1590 {

   1591         if (unlikely(blk_queue_stopped(q)))

   1592                 return;

   1593

   1594         if (!blk_remove_plug(q))

   1595                 return;

   1596

   1597         q->request_fn(q);

   1598 }

其实最有看点的就是1597行调用这个request_fn,struct request_queue中的一个成员request_fn_proc *request_fn,而至于request_fn_proc,其实又是typedef的小伎俩,来自include/linux/blkdev.h:

    334 typedef void (request_fn_proc) (request_queue_t *q);

那么这个request_fn是多少呢?还记得当初那个scsi子系统中申请队列的函数了么?没错,就是__scsi_alloc_queue(),调用它的是scsi_alloc_queue(),而在调用的时候就传递了这个参数:

   1590 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)

   1591 {

   1592         struct request_queue *q;

   1593

   1594         q = __scsi_alloc_queue(sdev->host, scsi_request_fn);

   1595         if (!q)

   1596                 return NULL;

   1597

   1598         blk_queue_prep_rq(q, scsi_prep_fn);

   1599         blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);

   1600         blk_queue_softirq_done(q, scsi_softirq_done);

   1601         return q;

   1602 }

,就是这个scsi_request_fn(),这么一个函数指针通过几次传递并最终在blk_init_queue_node()中被赋予了q->request_fn.所以我们真正需要关心的是scsi_request_fn.

在看scsi_request_fn之前,注意这里1598行至1560行也是赋了三个函数指针,

    132 /**

    133  * blk_queue_prep_rq - set a prepare_request function for queue

    134  * @q:          queue

    135  * @pfn:        prepare_request function

    136  *

    137  * It's possible for a queue to register a prepare_request callback which

    138  * is invoked before the request is handed to the request_fn. The goal of

    139  * the function is to prepare a request for I/O, it can be used to build a

    140  * cdb from the request data for instance.

    141  *

    142  */

    143 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)

    144 {

    145         q->prep_rq_fn = pfn;

146 }

    303 /**

    304  * blk_queue_issue_flush_fn - set function for issuing a flush

    305  * @q:     the request queue

    306  * @iff:   the function to be called issuing the flush

    307  *

    308  * Description:

    309  *   If a driver supports issuing a flush command, the support is notified

    310  *   to the block layer by defining it through this call.

    311  *

    312  **/

    313 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)

    314 {

    315         q->issue_flush_fn = iff;

316 }

    173 void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)

    174 {

    175         q->softirq_done_fn = fn;

    176 }

分别是把scsi_prep_fn赋给了q->prep_rq_fn,scsi_issue_flush_fn赋给了q->issue_flush_fn,scsi_softirq_done赋给了q->softirq_done_fn.尤其是scsi_prep_fn我们马上就会用到.

,让我们继续前面的话题,来看scsi_request_fn().

   1411 /*

   1412  * Function:    scsi_request_fn()

   1413  *

   1414  * Purpose:     Main strategy routine for SCSI.

   1415  *

   1416  * Arguments:   q       - Pointer to actual queue.

   1417  *

   1418  * Returns:     Nothing

   1419  *

   1420  * Lock status: IO request lock assumed to be held when called.

   1421  */

   1422 static void scsi_request_fn(struct request_queue *q)

   1423 {

   1424         struct scsi_device *sdev = q->queuedata;

   1425         struct Scsi_Host *shost;

   1426         struct scsi_cmnd *cmd;

   1427         struct request *req;

   1428

   1429         if (!sdev) {

   1430                 printk("scsi: killing requests for dead queue/n");

   1431                 while ((req = elv_next_request(q)) != NULL)

   1432                         scsi_kill_request(req, q);

   1433                 return;

   1434         }

   1435

   1436         if(!get_device(&sdev->sdev_gendev))

   1437                 /* We must be tearing the block queue down already */

   1438                 return;

   1439

   1440         /*

   1441          * To start with, we keep looping until the queue is empty, or until

   1442          * the host is no longer able to accept any more requests.

   1443          */

   1444         shost = sdev->host;

   1445         while (!blk_queue_plugged(q)) {

   1446                 int rtn;

   1447                 /*

   1448                  * get next queueable request.  We do this early to make sure

   1449                  * that the request is fully prepared even if we cannot

   1450                  * accept it.

   1451                  */

   1452                 req = elv_next_request(q);

   1453                 if (!req || !scsi_dev_queue_ready(q, sdev))

   1454                         break;

   1455

   1456                 if (unlikely(!scsi_device_online(sdev))) {

   1457                         sdev_printk(KERN_ERR, sdev,

   1458                                     "rejecting I/O to offline device/n");

   1459                         scsi_kill_request(req, q);

   1460                         continue;

   1461                 }

   1462

   1463

   1464                 /*

   1465                  * Remove the request from the request list.

   1466                  */

   1467                 if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))

   1468                         blkdev_dequeue_request(req);

   1469                 sdev->device_busy++;

   1470

   1471                 spin_unlock(q->queue_lock);

   1472                 cmd = req->special;

   1473                 if (unlikely(cmd == NULL)) {

   1474                         printk(KERN_CRIT "impossible request in %s./n"

   1475                                          "please mail a stack trace to "

   1476                                          "[email protected]/n",

   1477                                          __FUNCTION__);

   1478                         blk_dump_rq_flags(req, "foo");

   1479                         BUG();

   1480                 }

   1481                 spin_lock(shost->host_lock);

   1482

   1483                 if (!scsi_host_queue_ready(q, shost, sdev))

   1484                         goto not_ready;

   1485                 if (sdev->single_lun) {

   1486                         if (scsi_target(sdev)->starget_sdev_user &&

   1487                             scsi_target(sdev)->starget_sdev_user != sdev)

   1488                                 goto not_ready;

   1489                         scsi_target(sdev)->starget_sdev_user = sdev;

   1490                 }

   1491                 shost->host_busy++;

   1492

   1493                 /*

   1494                  * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will

   1495                  *              take the lock again.

   1496                  */

   1497                 spin_unlock_irq(shost->host_lock);

   1498

   1499                 /*

   1500                  * Finally, initialize any error handling parameters, and set up

   1501                  * the timers for timeouts.

   1502                  */

   1503                 scsi_init_cmd_errh(cmd);

   1504

   1505                 /*

   1506                  * Dispatch the command to the low-level driver.

   1507                  */

   1508                 rtn = scsi_dispatch_cmd(cmd);

   1509                 spin_lock_irq(q->queue_lock);

   1510                 if(rtn) {

   1511                         /* we're refusing the command; because of

   1512                          * the way locks get dropped, we need to

   1513                          * check here if plugging is required */

   1514                         if(sdev->device_busy == 0)

   1515                                 blk_plug_device(q);

   1516

   1517                         break;

   1518                 }

   1519         }

   1520

   1521         goto out;

   1522

   1523  not_ready:

   1524         spin_unlock_irq(shost->host_lock);

   1525

   1526         /*

   1527          * lock q, handle tag, requeue req, and decrement device_busy. We

   1528          * must return with queue_lock held.

   1529          *

   1530          * Decrementing device_busy without checking it is OK, as all such

   1531          * cases (host limits or settings) should run the queue at some

   1532          * later time.

   1533          */

   1534         spin_lock_irq(q->queue_lock);

   1535         blk_requeue_request(q, req);

   1536         sdev->device_busy--;

   1537         if(sdev->device_busy == 0)

   1538                 blk_plug_device(q);

   1539  out:

   1540         /* must be careful here...if we trigger the ->remove() function

   1541          * we cannot be holding the q lock */

   1542         spin_unlock_irq(q->queue_lock);

   1543         put_device(&sdev->sdev_gendev);

   1544         spin_lock_irq(q->queue_lock);

   1545 }

首先关注elv_next_request().来自block/elevator.c:

    712 struct request *elv_next_request(request_queue_t *q)

    713 {

    714         struct request *rq;

    715         int ret;

    716

    717         while ((rq = __elv_next_request(q)) != NULL) {

    718                 if (!(rq->cmd_flags & REQ_STARTED)) {

    719                         /*

    720                          * This is the first time the device driver

    721                          * sees this request (possibly after

    722                          * requeueing).  Notify IO scheduler.

    723                          */

    724                         if (blk_sorted_rq(rq))

    725                                 elv_activate_rq(q, rq);

    726

    727                         /*

    728                          * just mark as started even if we don't start

    729                          * it, a request that has been delayed should

    730                          * not be passed by new incoming requests

    731                          */

    732                         rq->cmd_flags |= REQ_STARTED;

    733                         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);

    734                 }

    735

    736                 if (!q->boundary_rq || q->boundary_rq == rq) {

    737                         q->end_sector = rq_end_sector(rq);

    738                         q->boundary_rq = NULL;

    739                 }

    740

    741                 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)

    742                         break;

    743

    744                 ret = q->prep_rq_fn(q, rq);

    745                 if (ret == BLKPREP_OK) {

    746                         break;

    747                 } else if (ret == BLKPREP_DEFER) {

    748                         /*

    749                          * the request may have been (partially) prepped.

    750                          * we need to keep this request in the front to

    751                          * avoid resource deadlock.  REQ_STARTED will

752                          * prevent other fs requests from passing this one.

    753                          */

    754                         rq = NULL;

    755                         break;

    756                 } else if (ret == BLKPREP_KILL) {

    757                         int nr_bytes = rq->hard_nr_sectors << 9;

    758

    759                         if (!nr_bytes)

    760                                 nr_bytes = rq->data_len;

    761

    762                         blkdev_dequeue_request(rq);

    763                         rq->cmd_flags |= REQ_QUIET;

    764                         end_that_request_chunk(rq, 0, nr_bytes);

    765                         end_that_request_last(rq, 0);

    766                 } else {

    767                         printk(KERN_ERR "%s: bad return=%d/n", __FUNCTION__,

    768                                                                 ret);

    769                         break;

    770                 }

    771         }

    772

    773         return rq;

    774 }

它调用的__elv_next_request()仍然来自block/elevator.c:

    696 static inline struct request *__elv_next_request(request_queue_t *q)

    697 {

    698         struct request *rq;

    699

    700         while (1) {

    701                 while (!list_empty(&q->queue_head)) {

    702                         rq = list_entry_rq(q->queue_head.next);

    703                         if (blk_do_ordered(q, &rq))

    704                                 return rq;

    705                 }

    706

    707                 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))

    708                         return NULL;

    709         }

    710 }

由于我们刚才那个精彩的插入动作,这里q->queue_head不可能为空.所以从中取出一个request.

首先是blk_do_ordered(),来自block/ll_rw_blk.c:

    478 int blk_do_ordered(request_queue_t *q, struct request **rqp)

    479 {

    480         struct request *rq = *rqp;

    481         int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);

    482

    483         if (!q->ordseq) {

    484                 if (!is_barrier)

    485                         return 1;

    486

    487                 if (q->next_ordered != QUEUE_ORDERED_NONE) {

    488                         *rqp = start_ordered(q, rq);

    489                         return 1;

    490                 } else {

    491                         /*

    492                          * This can happen when the queue switches to

    493                          * ORDERED_NONE while this request is on it.

    494                          */

    495                         blkdev_dequeue_request(rq);

    496                         end_that_request_first(rq, -EOPNOTSUPP,

    497                                                rq->hard_nr_sectors);

    498                         end_that_request_last(rq, -EOPNOTSUPP);

    499                         *rqp = NULL;

    500                         return 0;

    501                 }

    502         }

    503

    504         /*

    505          * Ordered sequence in progress

    506          */

    507

    508         /* Special requests are not subject to ordering rules. */

    509         if (!blk_fs_request(rq) &&

    510             rq != &q->pre_flush_rq && rq != &q->post_flush_rq)

    511                 return 1;

    512

    513         if (q->ordered & QUEUE_ORDERED_TAG) {

    514                 /* Ordered by tag.  Blocking the next barrier is enough. */

    515                 if (is_barrier && rq != &q->bar_rq)

    516                         *rqp = NULL;

    517         } else {

    518                 /* Ordered by draining.  Wait for turn. */

    519                 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));

520                 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))

    521                         *rqp = NULL;

    522         }

    523

    524         return 1;

    525 }

首先看一下blk_fs_request,

    528 #define blk_fs_request(rq)      ((rq)->cmd_type == REQ_TYPE_FS)

很显然,咱们的情况和这个不一样.

所以在咱们这个上下文里,is_barrier一定是0.所以,blk_do_ordered二话不说,直接返回1.那么回到__elv_next_request以后,703行这个if条件是满足的,所以也就是返回rq.而下面那个elevator_dispatch_fn实际上在我们这个上下文中是不会执行的.另一方面,我们从__elv_next_request返回,回到elv_next_request()的时候,只要request queue不是空的,那么返回值就是队列头的那个request.

继续往下走,cmd_flags其实整个故事中设置REQ_STARTED的也就是这里,732,所以在我们执行732行之前,这个flag是没有设置的.因此,if条件是满足的.

blk_sorted_rq又是一个宏,来自include/linux/blkdev.h:

    543 #define blk_sorted_rq(rq)       ((rq)->cmd_flags & REQ_SORTED)

很显然,咱们也从来没有设置过这个flag,所以这里不关我们的事.

当然了,对于noop,即便执行下一个函数也没有意义,因为这个elv_activate_rq()来自block/elevator.c:

    272 static void elv_activate_rq(request_queue_t *q, struct request *rq)

    273 {

    274         elevator_t *e = q->elevator;

    275

    276         if (e->ops->elevator_activate_req_fn)

    277                 e->ops->elevator_activate_req_fn(q, rq);

    278 }

而我们知道,对于noop来说,根本就没有这个指针,所以我们不准不开心.

这时候,我们设置REQ_STARTED这个flag.

最开始我们在elevator_init(),有这么一句:

    230         q->boundary_rq = NULL;

于是rq_end_sector会被执行,这其实也只是一个很简单的宏.

    172 #define rq_end_sector(rq)       ((rq)->sector + (rq)->nr_sectors)

同时,boundary_rq还是被置为NULL.

接下来,由于我们把prep_rq_fn赋上了scsi_prep_fn,所以我们要看一下这个scsi_prep_fn(),这个来自drivers/scsi/scsi_lib.c的函数.

   1176 static int scsi_prep_fn(struct request_queue *q, struct request *req)

   1177 {

   1178         struct scsi_device *sdev = q->queuedata;

   1179         int ret = BLKPREP_OK;

   1180

   1181         /*

   1182          * If the device is not in running state we will reject some

   1183          * or all commands.

   1184          */

   1185         if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {

   1186                 switch (sdev->sdev_state) {

   1187                 case SDEV_OFFLINE:

   1188                         /*

   1189                          * If the device is offline we refuse to process any

   1190                          * commands.  The device must be brought online

   1191                          * before trying any recovery commands.

   1192                          */

   1193                         sdev_printk(KERN_ERR, sdev,

   1194                                     "rejecting I/O to offline device/n");

   1195                         ret = BLKPREP_KILL;

   1196                         break;

   1197                 case SDEV_DEL:

   1198                         /*

   1199                          * If the device is fully deleted, we refuse to

   1200                          * process any commands as well.

   1201                          */

   1202                         sdev_printk(KERN_ERR, sdev,

   1203                                     "rejecting I/O to dead device/n");

   1204                         ret = BLKPREP_KILL;

   1205                         break;

   1206                 case SDEV_QUIESCE:

   1207                 case SDEV_BLOCK:

   1208                         /*

   1209                          * If the devices is blocked we defer normal commands.

   1210                          */

   1211                         if (!(req->cmd_flags & REQ_PREEMPT))

   1212                                 ret = BLKPREP_DEFER;

   1213                         break;

   1214                 default:

   1215                         /*

   1216                          * For any other not fully online state we only allow

   1217                          * special commands.  In particular any user initiated

   1218                          * command is not allowed.

   1219                          */

   1220                         if (!(req->cmd_flags & REQ_PREEMPT))

   1221                                 ret = BLKPREP_KILL;

   1222                         break;

   1223                 }

   1224

   1225                 if (ret != BLKPREP_OK)

   1226                         goto out;

   1227         }

   1228

   1229         switch (req->cmd_type) {

   1230         case REQ_TYPE_BLOCK_PC:

   1231                 ret = scsi_setup_blk_pc_cmnd(sdev, req);

   1232                 break;

   1233         case REQ_TYPE_FS:

   1234                 ret = scsi_setup_fs_cmnd(sdev, req);

   1235                 break;

   1236         default:

   1237                 /*

   1238                  * All other command types are not supported.

   1239                  *

   1240                  * Note that these days the SCSI subsystem does not use

   1241                  * REQ_TYPE_SPECIAL requests anymore.  These are only used

   1242                  * (directly or via blk_insert_request) by non-SCSI drivers.

   1243                  */

   1244                 blk_dump_rq_flags(req, "SCSI bad req");

   1245                 ret = BLKPREP_KILL;

   1246                 break;

   1247         }

   1248

   1249  out:

   1250         switch (ret) {

   1251         case BLKPREP_KILL:

   1252                 req->errors = DID_NO_CONNECT << 16;

   1253                 break;

   1254         case BLKPREP_DEFER:

   1255                 /*

   1256                  * If we defer, the elv_next_request() returns NULL, but the

   1257                  * queue must be restarted, so we plug here if no returning

   1258                  * command will automatically do that.

   1259                  */

   1260                 if (sdev->device_busy == 0)

   1261                         blk_plug_device(q);

   1262                 break;

   1263         default:

   1264                 req->cmd_flags |= REQ_DONTPREP;

   1265         }

   1266

   1267         return ret;

   1268 }

按正路,我们会走到1229行这个switch语句,并且会根据scsi命令的类型而执行不同的函数, scsi_setup_blk_pc_cmnd或者scsi_setup_fs_cmnd.那么我们cmd_type究竟是什么呢?回首那如烟的往事,犹记当初在scsi_execute()中有这么一行,

    199         req->cmd_type = REQ_TYPE_BLOCK_PC;

所以,没什么好说的.我们会执行scsi_setup_blk_pc_cmnd,来自drivers/scsi/scsi_lib.c:

   1090 static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)

   1091 {

   1092         struct scsi_cmnd *cmd;

   1093

   1094         cmd = scsi_get_cmd_from_req(sdev, req);

   1095         if (unlikely(!cmd))

   1096                 return BLKPREP_DEFER;

   1097

   1098         /*

   1099          * BLOCK_PC requests may transfer data, in which case they must

   1100          * a bio attached to them.  Or they might contain a SCSI command

   1101          * that does not transfer data, in which case they may optionally

   1102          * submit a request without an attached bio.

   1103          */

   1104         if (req->bio) {

   1105                 int ret;

   1106

   1107                 BUG_ON(!req->nr_phys_segments);

   1108

   1109                 ret = scsi_init_io(cmd);

   1110                 if (unlikely(ret))

   1111                         return ret;

   1112         } else {

   1113                 BUG_ON(req->data_len);

   1114                 BUG_ON(req->data);

   1115

   1116                 cmd->request_bufflen = 0;

   1117                 cmd->request_buffer = NULL;

   1118                 cmd->use_sg = 0;

   1119                 req->buffer = NULL;

   1120         }

   1121

   1122         BUILD_BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd));

   1123         memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));

   1124         cmd->cmd_len = req->cmd_len;

   1125         if (!req->data_len)

   1126                 cmd->sc_data_direction = DMA_NONE;

   1127         else if (rq_data_dir(req) == WRITE)

   1128                 cmd->sc_data_direction = DMA_TO_DEVICE;

   1129         else

   1130                 cmd->sc_data_direction = DMA_FROM_DEVICE;

   1131

   1132         cmd->transfersize = req->data_len;

   1133         cmd->allowed = req->retries;

   1134         cmd->timeout_per_command = req->timeout;

   1135         cmd->done = scsi_blk_pc_done;

   1136         return BLKPREP_OK;

   1137 }

如果曾经的你还对scsi cmd是如何形成的颇有疑义的话,那么相信此刻,你应该会明白了吧,尤其是当你在usb-storage那个故事中看到对它sc_data_direction的判断的时候,你不理解这个值是如何设定的,那么此刻,这代码活生生的展现在你面前,想必已经揭开了你心中那谜团吧.

最终,正常的话,函数返回BLKPREP_OK.prep表示prepare的意思,用我们的母语说就是准备的意思,最后BLKPREP_OK就说明准备好了,或者说准备就绪.scsi_prep_fn()也将返回这个值,返回之前还设置了cmd_flags中的REQ_DONTPREP.(注意elv_next_request()函数741行判断的就是设没设这个flag.)

回到elv_next_request(),由于返回值是BLKPREP_OK,所以746行我们就break.换言之,我们取到了一个request,我们为之准备好了scsi命令,我们下一步就该是执行这个命令了.所以我们不需要再在elv_next_request()中滞留.我们终于回到了scsi_request_fn(),汤唯姐姐曾坦言拍床戏的经验让她恍如在地狱走了一趟,而看代码的我又何尝不是如此呢?而且汤唯姐姐说虽然过程好似地狱,但过后就是天堂.而我们则永远陷在这代码中,不知何时才是个头,这不,结束了elv_next_request,又要看下一个,不只是一个,而是两个,1467,一个宏加一个函数,宏是blk_queue_tagged,来自include/linux/blkdev.h:

    524 #define blk_queue_tagged(q)     test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)

而函数是blk_queue_start_tag,来自block/ll_rw_blk.c:

   1104 /**

   1105  * blk_queue_start_tag - find a free tag and assign it

   1106  * @q:  the request queue for the device

   1107  * @rq:  the block request that needs tagging

   1108  *

   1109  *  Description:

   1110  *    This can either be used as a stand-alone helper, or possibly be

   1111  *    assigned as the queue &prep_rq_fn (in which case &struct request

   1112  *    automagically gets a tag assigned). Note that this function

   1113  *    assumes that any type of request can be queued! if this is not

   1114  *    true for your device, you must check the request type before

   1115  *    calling this function.  The request will also be removed from

   1116  *    the request queue, so it's the drivers responsibility to readd

   1117  *    it if it should need to be restarted for some reason.

   1118  *

   1119  *  Notes:

   1120  *   queue lock must be held.

   1121  **/

   1122 int blk_queue_start_tag(request_queue_t *q, struct request *rq)

   1123 {

   1124         struct blk_queue_tag *bqt = q->queue_tags;

   1125         int tag;

   1126

   1127         if (unlikely((rq->cmd_flags & REQ_QUEUED))) {

   1128                 printk(KERN_ERR

   1129                        "%s: request %p for device [%s] already tagged %d",

   1130                        __FUNCTION__, rq,

   1131                        rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);

   1132                 BUG();

   1133         }

   1134

   1135         /*

   1136          * Protect against shared tag maps, as we may not have exclusive

   1137          * access to the tag map.

   1138          */

   1139         do {

   1140                 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);

   1141                 if (tag >= bqt->max_depth)

   1142                         return 1;

   1143

   1144         } while (test_and_set_bit(tag, bqt->tag_map));

   1145

   1146         rq->cmd_flags |= REQ_QUEUED;

   1147         rq->tag = tag;

   1148         bqt->tag_index[tag] = rq;

   1149         blkdev_dequeue_request(rq);

   1150         list_add(&rq->queuelist, &bqt->busy_list);

   1151         bqt->busy++;

   1152         return 0;

   1153 }

对于我们大多数人来说,这两个函数的返回值都是0.

也因此,下一个函数blkdev_dequeue_request()就会被执行.来自include/linux/blkdev.h:

    725 static inline void blkdev_dequeue_request(struct request *req)

    726 {

    727         elv_dequeue_request(req->q, req);

    728 }

elv_dequeue_request来自block/elevator.c:

    778 void elv_dequeue_request(request_queue_t *q, struct request *rq)

    779 {

    780         BUG_ON(list_empty(&rq->queuelist));

    781         BUG_ON(ELV_ON_HASH(rq));

    782

    783         list_del_init(&rq->queuelist);

    784

    785         /*

    786          * the time frame between a request being removed from the lists

    787          * and to it is freed is accounted as io that is in progress at

    788          * the driver side.

    789          */

    790         if (blk_account_rq(rq))

    791                 q->in_flight++;

    792 }

现在这个社会就是利用与被利用的关系,既然这个request已经没有了利用价值,我们已经从它身上得到了我们想要的scsi命令,那么我们完全可以过河拆桥卸磨杀驴了.list_del_init把这个requestrequest queue队列里删除掉.

而下面这个blk_account_rq也是一个来自include/linux/blkdev.h的宏:

    536 #define blk_account_rq(rq)      (blk_rq_started(rq) && blk_fs_request(rq))

很显然,至少第二个条件我们是不满足的.所以不用多说,结束这个elv_dequeue_request.

现在是时候去执行scsi命令了.所以调用scsi_dispatch_cmd().

 

你可能感兴趣的:(Linux那些事儿,之,我是Block层)