Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)

一旦这种狼狈为奸的关系建立好了,就可以开始执行请求了.来看blk_execute_rq(),来自block/ll_rw_blk.c:

2605 /**

2606 * blk_execute_rq - insert a request into queue for execution

2607 * @q: queue to insert the request in

2608 * @bd_disk: matching gendisk

2609 * @rq: request to insert

2610 * @at_head: insert request at head or tail of queue

2611 *

2612 * Description:

2613 * Insert a fully prepared request at the back of the io scheduler queue

2614 * for execution and wait for completion.

2615 */

2616 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,

2617 struct request *rq, int at_head)

2618 {

2619 DECLARE_COMPLETION_ONSTACK(wait);

2620 char sense[SCSI_SENSE_BUFFERSIZE];

2621 int err = 0;

2622

2623 /*

2624 * we need an extra reference to the request, so we can look at

2625 * it after io completion

2626 */

2627 rq->ref_count++;

2628

2629 if (!rq->sense) {

2630 memset(sense, 0, sizeof(sense));

2631 rq->sense = sense;

2632 rq->sense_len = 0;

2633 }

2634

2635 rq->end_io_data = &wait;

2636 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);

2637 wait_for_completion(&wait);

2638

2639 if (rq->errors)

2640 err = -EIO;

2641

2642 return err;

2643 }

抛去那些用于错误处理的代码,这个函数真正有意义的代码就是两行, blk_execute_rq_nowaitwait_for_completion.先看前者,来自block/ll_rw_blk.c:

2576 /**

2577 * blk_execute_rq_nowait - insert a request into queue for execution

2578 * @q: queue to insert the request in

2579 * @bd_disk: matching gendisk

2580 * @rq: request to insert

2581 * @at_head: insert request at head or tail of queue

2582 * @done: I/O completion handler

2583 *

2584 * Description:

2585 * Insert a fully prepared request at the back of the io scheduler queue

2586 * for execution. Don't wait for completion.

2587 */

2588 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,

2589 struct request *rq, int at_head,

2590 rq_end_io_fn *done)

2591 {

2592 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;

2593

2594 rq->rq_disk = bd_disk;

2595 rq->cmd_flags |= REQ_NOMERGE;

2596 rq->end_io = done;

2597 WARN_ON(irqs_disabled());

2598 spin_lock_irq(q->queue_lock);

2599 __elv_add_request(q, rq, where, 1);

2600 __generic_unplug_device(q);

2601 spin_unlock_irq(q->queue_lock);

2602 }

首先at_head是表示往哪插.(…,该不会还有一个参数表示用什么姿势插吧.)

where用来记录at_head的值.在我们这个上下文中,at_head是从scsi_execute()中调用blk_execute_rq的时候传递下来的,当时我们设置的是1.于是where被设置为ELEVATOR_INSERT_FRONT.这几个宏来自include/linux/elevator.h:

155 /*

156 * Insertion selection

157 */

158 #define ELEVATOR_INSERT_FRONT 1

159 #define ELEVATOR_INSERT_BACK 2

160 #define ELEVATOR_INSERT_SORT 3

161 #define ELEVATOR_INSERT_REQUEUE 4

很明显,这是告诉我们从前面插,还算不是太变态.那么带着这个where我们进入下一个函数,__elv_add_request.来自block/elevator.c:

646 void __elv_add_request(request_queue_t *q, struct request *rq, int where,

647 int plug)

648 {

649 if (q->ordcolor)

650 rq->cmd_flags |= REQ_ORDERED_COLOR;

651

652 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {

653 /*

654 * toggle ordered color

655 */

656 if (blk_barrier_rq(rq))

657 q->ordcolor ^= 1;

658

659 /*

660 * barriers implicitly indicate back insertion

661 */

662 if (where == ELEVATOR_INSERT_SORT)

663 where = ELEVATOR_INSERT_BACK;

664

665 /*

666 * this request is scheduling boundary, update

667 * end_sector

668 */

669 if (blk_fs_request(rq)) {

670 q->end_sector = rq_end_sector(rq);

671 q->boundary_rq = rq;

672 }

673 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)

674 where = ELEVATOR_INSERT_BACK;

675

676 if (plug)

677 blk_plug_device(q);

678

679 elv_insert(q, rq, where);

680 }

传入的参数plug等于1,所以blk_plug_device()会被执行.暂且先不管这个函数.

很明显,前面都和我们无关,直接跳到最后一行这个elv_insert().

548 void elv_insert(request_queue_t *q, struct request *rq, int where)

549 {

550 struct list_head *pos;

551 unsigned ordseq;

552 int unplug_it = 1;

553

554 blk_add_trace_rq(q, rq, BLK_TA_INSERT);

555

556 rq->q = q;

557

558 switch (where) {

559 case ELEVATOR_INSERT_FRONT:

560 rq->cmd_flags |= REQ_SOFTBARRIER;

561

562 list_add(&rq->queuelist, &q->queue_head);

563 break;

564

565 case ELEVATOR_INSERT_BACK:

566 rq->cmd_flags |= REQ_SOFTBARRIER;

567 elv_drain_elevator(q);

568 list_add_tail(&rq->queuelist, &q->queue_head);

569 /*

570 * We kick the queue here for the following reasons.

571 * - The elevator might have returned NULL previously

572 * to delay requests and returned them now. As the

573 * queue wasn't empty before this request, ll_rw_blk

574 * won't run the queue on return, resulting in hang.

575 * - Usually, back inserted requests won't be merged

576 * with anything. There's no point in delaying queue

577 * processing.

578 */

579 blk_remove_plug(q);

580 q->request_fn(q);

581 break;

582

583 case ELEVATOR_INSERT_SORT:

584 BUG_ON(!blk_fs_request(rq));

585 rq->cmd_flags |= REQ_SORTED;

586 q->nr_sorted++;

587 if (rq_mergeable(rq)) {

588 elv_rqhash_add(q, rq);

589 if (!q->last_merge)

590 q->last_merge = rq;

591 }

592

593 /*

594 * Some ioscheds (cfq) run q->request_fn directly, so

595 * rq cannot be accessed after calling

596 * elevator_add_req_fn.

597 */

598 q->elevator->ops->elevator_add_req_fn(q, rq);

599 break;

600

601 case ELEVATOR_INSERT_REQUEUE:

602 /*

603 * If ordered flush isn't in progress, we do front

604 * insertion; otherwise, requests should be requeued

605 * in ordseq order.

606 */

607 rq->cmd_flags |= REQ_SOFTBARRIER;

608

609 /*

610 * Most requeues happen because of a busy condition,

611 * don't force unplug of the queue for that case.

612 */

613 unplug_it = 0;

614

615 if (q->ordseq == 0) {

616 list_add(&rq->queuelist, &q->queue_head);

617 break;

618 }

619

620 ordseq = blk_ordered_req_seq(rq);

621

622 list_for_each(pos, &q->queue_head) {

623 struct request *pos_rq = list_entry_rq(pos);

624 if (ordseq <= blk_ordered_req_seq(pos_rq))

625 break;

626 }

627

628 list_add_tail(&rq->queuelist, pos);

629 break;

630

631 default:

632 printk(KERN_ERR "%s: bad insertion point %d/n",

633 __FUNCTION__, where);

634 BUG();

635 }

636

637 if (unplug_it && blk_queue_plugged(q)) {

638 int nrq = q->rq.count[READ] + q->rq.count[WRITE]

639 - q->in_flight;

640

641 if (nrq >= q->unplug_thresh)

642 __generic_unplug_device(q);

643 }

644 }

由于我们是从前面插,所以我们执行562行这个list_add,struct request有一个成员struct list_head queuelist,struct request_queue有一个成员struct list_head queue_head,所以我们就把前者插入到后者所代表的这个队伍中来.然后咱们就返回了.

回到blk_execute_rq_nowait(),下一个被调用的函数是__generic_unplug_device,依然是来自block/ll_rw_blk.c:

1586 /*

1587 * remove the plug and let it rip..

1588 */

1589 void __generic_unplug_device(request_queue_t *q)

1590 {

1591 if (unlikely(blk_queue_stopped(q)))

1592 return;

1593

1594 if (!blk_remove_plug(q))

1595 return;

1596

1597 q->request_fn(q);

1598 }

其实最有看点的就是1597行调用这个request_fn,struct request_queue中的一个成员request_fn_proc *request_fn,而至于request_fn_proc,其实又是typedef的小伎俩,来自include/linux/blkdev.h:

334 typedef void (request_fn_proc) (request_queue_t *q);

那么这个request_fn是多少呢?还记得当初那个scsi子系统中申请队列的函数了么?没错,就是__scsi_alloc_queue(),调用它的是scsi_alloc_queue(),而在调用的时候就传递了这个参数:

1590 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)

1591 {

1592 struct request_queue *q;

1593

1594 q = __scsi_alloc_queue(sdev->host, scsi_request_fn);

1595 if (!q)

1596 return NULL;

1597

1598 blk_queue_prep_rq(q, scsi_prep_fn);

1599 blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);

1600 blk_queue_softirq_done(q, scsi_softirq_done);

1601 return q;

1602 }

,就是这个scsi_request_fn(),这么一个函数指针通过几次传递并最终在blk_init_queue_node()中被赋予了q->request_fn.所以我们真正需要关心的是scsi_request_fn.

在看scsi_request_fn之前,注意这里1598行至1560行也是赋了三个函数指针,

132 /**

133 * blk_queue_prep_rq - set a prepare_request function for queue

134 * @q: queue

135 * @pfn: prepare_request function

136 *

137 * It's possible for a queue to register a prepare_request callback which

138 * is invoked before the request is handed to the request_fn. The goal of

139 * the function is to prepare a request for I/O, it can be used to build a

140 * cdb from the request data for instance.

141 *

142 */

143 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)

144 {

145 q->prep_rq_fn = pfn;

146 }

303 /**

304 * blk_queue_issue_flush_fn - set function for issuing a flush

305 * @q: the request queue

306 * @iff: the function to be called issuing the flush

307 *

308 * Description:

309 * If a driver supports issuing a flush command, the support is notified

310 * to the block layer by defining it through this call.

311 *

312 **/

313 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)

314 {

315 q->issue_flush_fn = iff;

316 }

173 void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)

174 {

175 q->softirq_done_fn = fn;

176 }

分别是把scsi_prep_fn赋给了q->prep_rq_fn,scsi_issue_flush_fn赋给了q->issue_flush_fn,scsi_softirq_done赋给了q->softirq_done_fn.尤其是scsi_prep_fn我们马上就会用到.

,让我们继续前面的话题,来看scsi_request_fn().

1411 /*

1412 * Function: scsi_request_fn()

1413 *

1414 * Purpose: Main strategy routine for SCSI.

1415 *

1416 * Arguments: q - Pointer to actual queue.

1417 *

1418 * Returns: Nothing

1419 *

1420 * Lock status: IO request lock assumed to be held when called.

1421 */

1422 static void scsi_request_fn(struct request_queue *q)

1423 {

1424 struct scsi_device *sdev = q->queuedata;

1425 struct Scsi_Host *shost;

1426 struct scsi_cmnd *cmd;

1427 struct request *req;

1428

1429 if (!sdev) {

1430 printk("scsi: killing requests for dead queue/n");

1431 while ((req = elv_next_request(q)) != NULL)

1432 scsi_kill_request(req, q);

1433 return;

1434 }

1435

1436 if(!get_device(&sdev->sdev_gendev))

1437 /* We must be tearing the block queue down already */

1438 return;

1439

1440 /*

1441 * To start with, we keep looping until the queue is empty, or until

1442 * the host is no longer able to accept any more requests.

1443 */

1444 shost = sdev->host;

1445 while (!blk_queue_plugged(q)) {

1446 int rtn;

1447 /*

1448 * get next queueable request. We do this early to make sure

1449 * that the request is fully prepared even if we cannot

1450 * accept it.

1451 */

1452 req = elv_next_request(q);

1453 if (!req || !scsi_dev_queue_ready(q, sdev))

1454 break;

1455

1456 if (unlikely(!scsi_device_online(sdev))) {

1457 sdev_printk(KERN_ERR, sdev,

1458 "rejecting I/O to offline device/n");

1459 scsi_kill_request(req, q);

1460 continue;

1461 }

1462

1463

1464 /*

1465 * Remove the request from the request list.

1466 */

1467 if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))

1468 blkdev_dequeue_request(req);

1469 sdev->device_busy++;

1470

1471 spin_unlock(q->queue_lock);

1472 cmd = req->special;

1473 if (unlikely(cmd == NULL)) {

1474 printk(KERN_CRIT "impossible request in %s./n"

1475 "please mail a stack trace to "

1476 "[email protected]/n",

1477 __FUNCTION__);

1478 blk_dump_rq_flags(req, "foo");

1479 BUG();

1480 }

1481 spin_lock(shost->host_lock);

1482

1483 if (!scsi_host_queue_ready(q, shost, sdev))

1484 goto not_ready;

1485 if (sdev->single_lun) {

1486 if (scsi_target(sdev)->starget_sdev_user &&

1487 scsi_target(sdev)->starget_sdev_user != sdev)

1488 goto not_ready;

1489 scsi_target(sdev)->starget_sdev_user = sdev;

1490 }

1491 shost->host_busy++;

1492

1493 /*

1494 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will

1495 * take the lock again.

1496 */

1497 spin_unlock_irq(shost->host_lock);

1498

1499 /*

1500 * Finally, initialize any error handling parameters, and set up

1501 * the timers for timeouts.

1502 */

1503 scsi_init_cmd_errh(cmd);

1504

1505 /*

1506 * Dispatch the command to the low-level driver.

1507 */

1508 rtn = scsi_dispatch_cmd(cmd);

1509 spin_lock_irq(q->queue_lock);

1510 if(rtn) {

1511 /* we're refusing the command; because of

1512 * the way locks get dropped, we need to

1513 * check here if plugging is required */

1514 if(sdev->device_busy == 0)

1515 blk_plug_device(q);

1516

1517 break;

1518 }

1519 }

1520

1521 goto out;

1522

1523 not_ready:

1524 spin_unlock_irq(shost->host_lock);

1525

1526 /*

1527 * lock q, handle tag, requeue req, and decrement device_busy. We

1528 * must return with queue_lock held.

1529 *

1530 * Decrementing device_busy without checking it is OK, as all such

1531 * cases (host limits or settings) should run the queue at some

1532 * later time.

1533 */

1534 spin_lock_irq(q->queue_lock);

1535 blk_requeue_request(q, req);

1536 sdev->device_busy--;

1537 if(sdev->device_busy == 0)

1538 blk_plug_device(q);

1539 out:

1540 /* must be careful here...if we trigger the ->remove() function

1541 * we cannot be holding the q lock */

1542 spin_unlock_irq(q->queue_lock);

1543 put_device(&sdev->sdev_gendev);

1544 spin_lock_irq(q->queue_lock);

1545 }

首先关注elv_next_request().来自block/elevator.c:

712 struct request *elv_next_request(request_queue_t *q)

713 {

714 struct request *rq;

715 int ret;

716

717 while ((rq = __elv_next_request(q)) != NULL) {

718 if (!(rq->cmd_flags & REQ_STARTED)) {

719 /*

720 * This is the first time the device driver

721 * sees this request (possibly after

722 * requeueing). Notify IO scheduler.

723 */

724 if (blk_sorted_rq(rq))

725 elv_activate_rq(q, rq);

726

727 /*

728 * just mark as started even if we don't start

729 * it, a request that has been delayed should

730 * not be passed by new incoming requests

731 */

732 rq->cmd_flags |= REQ_STARTED;

733 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);

734 }

735

736 if (!q->boundary_rq || q->boundary_rq == rq) {

737 q->end_sector = rq_end_sector(rq);

738 q->boundary_rq = NULL;

739 }

740

741 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)

742 break;

743

744 ret = q->prep_rq_fn(q, rq);

745 if (ret == BLKPREP_OK) {

746 break;

747 } else if (ret == BLKPREP_DEFER) {

748 /*

749 * the request may have been (partially) prepped.

750 * we need to keep this request in the front to

751 * avoid resource deadlock. REQ_STARTED will

752 * prevent other fs requests from passing this one.

753 */

754 rq = NULL;

755 break;

756 } else if (ret == BLKPREP_KILL) {

757 int nr_bytes = rq->hard_nr_sectors << 9;

758

759 if (!nr_bytes)

760 nr_bytes = rq->data_len;

761

762 blkdev_dequeue_request(rq);

763 rq->cmd_flags |= REQ_QUIET;

764 end_that_request_chunk(rq, 0, nr_bytes);

765 end_that_request_last(rq, 0);

766 } else {

767 printk(KERN_ERR "%s: bad return=%d/n", __FUNCTION__,

768 ret);

769 break;

770 }

771 }

772

773 return rq;

774 }

它调用的__elv_next_request()仍然来自block/elevator.c:

696 static inline struct request *__elv_next_request(request_queue_t *q)

697 {

698 struct request *rq;

699

700 while (1) {

701 while (!list_empty(&q->queue_head)) {

702 rq = list_entry_rq(q->queue_head.next);

703 if (blk_do_ordered(q, &rq))

704 return rq;

705 }

706

707 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))

708 return NULL;

709 }

710 }

由于我们刚才那个精彩的插入动作,这里q->queue_head不可能为空.所以从中取出一个request.

首先是blk_do_ordered(),来自block/ll_rw_blk.c:

478 int blk_do_ordered(request_queue_t *q, struct request **rqp)

479 {

480 struct request *rq = *rqp;

481 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);

482

483 if (!q->ordseq) {

484 if (!is_barrier)

485 return 1;

486

487 if (q->next_ordered != QUEUE_ORDERED_NONE) {

488 *rqp = start_ordered(q, rq);

489 return 1;

490 } else {

491 /*

492 * This can happen when the queue switches to

493 * ORDERED_NONE while this request is on it.

494 */

495 blkdev_dequeue_request(rq);

496 end_that_request_first(rq, -EOPNOTSUPP,

497 rq->hard_nr_sectors);

498 end_that_request_last(rq, -EOPNOTSUPP);

499 *rqp = NULL;

500 return 0;

501 }

502 }

503

504 /*

505 * Ordered sequence in progress

506 */

507

508 /* Special requests are not subject to ordering rules. */

509 if (!blk_fs_request(rq) &&

510 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)

511 return 1;

512

513 if (q->ordered & QUEUE_ORDERED_TAG) {

514 /* Ordered by tag. Blocking the next barrier is enough. */

515 if (is_barrier && rq != &q->bar_rq)

516 *rqp = NULL;

517 } else {

518 /* Ordered by draining. Wait for turn. */

519 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));

520 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))

521 *rqp = NULL;

522 }

523

524 return 1;

525 }

首先看一下blk_fs_request,

528 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)

很显然,咱们的情况和这个不一样.

所以在咱们这个上下文里,is_barrier一定是0.所以,blk_do_ordered二话不说,直接返回1.那么回到__elv_next_request以后,703行这个if条件是满足的,所以也就是返回rq.而下面那个elevator_dispatch_fn实际上在我们这个上下文中是不会执行的.另一方面,我们从__elv_next_request返回,回到elv_next_request()的时候,只要request queue不是空的,那么返回值就是队列头的那个request.

继续往下走,cmd_flags其实整个故事中设置REQ_STARTED的也就是这里,732,所以在我们执行732行之前,这个flag是没有设置的.因此,if条件是满足的.

blk_sorted_rq又是一个宏,来自include/linux/blkdev.h:

543 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)

很显然,咱们也从来没有设置过这个flag,所以这里不关我们的事.

当然了,对于noop,即便执行下一个函数也没有意义,因为这个elv_activate_rq()来自block/elevator.c:

272 static void elv_activate_rq(request_queue_t *q, struct request *rq)

273 {

274 elevator_t *e = q->elevator;

275

276 if (e->ops->elevator_activate_req_fn)

277 e->ops->elevator_activate_req_fn(q, rq);

278 }

而我们知道,对于noop来说,根本就没有这个指针,所以我们不准不开心.

这时候,我们设置REQ_STARTED这个flag.

最开始我们在elevator_init(),有这么一句:

230 q->boundary_rq = NULL;

于是rq_end_sector会被执行,这其实也只是一个很简单的宏.

172 #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)

同时,boundary_rq还是被置为NULL.

接下来,由于我们把prep_rq_fn赋上了scsi_prep_fn,所以我们要看一下这个scsi_prep_fn(),这个来自drivers/scsi/scsi_lib.c的函数.

1176 static int scsi_prep_fn(struct request_queue *q, struct request *req)

1177 {

1178 struct scsi_device *sdev = q->queuedata;

1179 int ret = BLKPREP_OK;

1180

1181 /*

1182 * If the device is not in running state we will reject some

1183 * or all commands.

1184 */

1185 if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {

1186 switch (sdev->sdev_state) {

1187 case SDEV_OFFLINE:

1188 /*

1189 * If the device is offline we refuse to process any

1190 * commands. The device must be brought online

1191 * before trying any recovery commands.

1192 */

1193 sdev_printk(KERN_ERR, sdev,

1194 "rejecting I/O to offline device/n");

1195 ret = BLKPREP_KILL;

1196 break;

1197 case SDEV_DEL:

1198 /*

1199 * If the device is fully deleted, we refuse to

1200 * process any commands as well.

1201 */

1202 sdev_printk(KERN_ERR, sdev,

1203 "rejecting I/O to dead device/n");

1204 ret = BLKPREP_KILL;

1205 break;

1206 case SDEV_QUIESCE:

1207 case SDEV_BLOCK:

1208 /*

1209 * If the devices is blocked we defer normal commands.

1210 */

1211 if (!(req->cmd_flags & REQ_PREEMPT))

1212 ret = BLKPREP_DEFER;

1213 break;

1214 default:

1215 /*

1216 * For any other not fully online state we only allow

1217 * special commands. In particular any user initiated

1218 * command is not allowed.

1219 */

1220 if (!(req->cmd_flags & REQ_PREEMPT))

1221 ret = BLKPREP_KILL;

1222 break;

1223 }

1224

1225 if (ret != BLKPREP_OK)

1226 goto out;

1227 }

1228

1229 switch (req->cmd_type) {

1230 case REQ_TYPE_BLOCK_PC:

1231 ret = scsi_setup_blk_pc_cmnd(sdev, req);

1232 break;

1233 case REQ_TYPE_FS:

1234 ret = scsi_setup_fs_cmnd(sdev, req);

1235 break;

1236 default:

1237 /*

1238 * All other command types are not supported.

1239 *

1240 * Note that these days the SCSI subsystem does not use

1241 * REQ_TYPE_SPECIAL requests anymore. These are only used

1242 * (directly or via blk_insert_request) by non-SCSI drivers.

1243 */

1244 blk_dump_rq_flags(req, "SCSI bad req");

1245 ret = BLKPREP_KILL;

1246 break;

1247 }

1248

1249 out:

1250 switch (ret) {

1251 case BLKPREP_KILL:

1252 req->errors = DID_NO_CONNECT << 16;

1253 break;

1254 case BLKPREP_DEFER:

1255 /*

1256 * If we defer, the elv_next_request() returns NULL, but the

1257 * queue must be restarted, so we plug here if no returning

1258 * command will automatically do that.

1259 */

1260 if (sdev->device_busy == 0)

1261 blk_plug_device(q);

1262 break;

1263 default:

1264 req->cmd_flags |= REQ_DONTPREP;

1265 }

1266

1267 return ret;

1268 }

按正路,我们会走到1229行这个switch语句,并且会根据scsi命令的类型而执行不同的函数, scsi_setup_blk_pc_cmnd或者scsi_setup_fs_cmnd.那么我们cmd_type究竟是什么呢?回首那如烟的往事,犹记当初在scsi_execute()中有这么一行,

199 req->cmd_type = REQ_TYPE_BLOCK_PC;

所以,没什么好说的.我们会执行scsi_setup_blk_pc_cmnd,来自drivers/scsi/scsi_lib.c:

1090 static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)

1091 {

1092 struct scsi_cmnd *cmd;

1093

1094 cmd = scsi_get_cmd_from_req(sdev, req);

1095 if (unlikely(!cmd))

1096 return BLKPREP_DEFER;

1097

1098 /*

1099 * BLOCK_PC requests may transfer data, in which case they must

1100 * a bio attached to them. Or they might contain a SCSI command

1101 * that does not transfer data, in which case they may optionally

1102 * submit a request without an attached bio.

1103 */

1104 if (req->bio) {

1105 int ret;

1106

1107 BUG_ON(!req->nr_phys_segments);

1108

1109 ret = scsi_init_io(cmd);

1110 if (unlikely(ret))

1111 return ret;

1112 } else {

1113 BUG_ON(req->data_len);

1114 BUG_ON(req->data);

1115

1116 cmd->request_bufflen = 0;

1117 cmd->request_buffer = NULL;

1118 cmd->use_sg = 0;

1119 req->buffer = NULL;

1120 }

1121

1122 BUILD_BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd));

1123 memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));

1124 cmd->cmd_len = req->cmd_len;

1125 if (!req->data_len)

1126 cmd->sc_data_direction = DMA_NONE;

1127 else if (rq_data_dir(req) == WRITE)

1128 cmd->sc_data_direction = DMA_TO_DEVICE;

1129 else

1130 cmd->sc_data_direction = DMA_FROM_DEVICE;

1131

1132 cmd->transfersize = req->data_len;

1133 cmd->allowed = req->retries;

1134 cmd->timeout_per_command = req->timeout;

1135 cmd->done = scsi_blk_pc_done;

1136 return BLKPREP_OK;

1137 }

如果曾经的你还对scsi cmd是如何形成的颇有疑义的话,那么相信此刻,你应该会明白了吧,尤其是当你在usb-storage那个故事中看到对它sc_data_direction的判断的时候,你不理解这个值是如何设定的,那么此刻,这代码活生生的展现在你面前,想必已经揭开了你心中那谜团吧.

最终,正常的话,函数返回BLKPREP_OK.prep表示prepare的意思,用我们的母语说就是准备的意思,最后BLKPREP_OK就说明准备好了,或者说准备就绪.scsi_prep_fn()也将返回这个值,返回之前还设置了cmd_flags中的REQ_DONTPREP.(注意elv_next_request()函数741行判断的就是设没设这个flag.)

回到elv_next_request(),由于返回值是BLKPREP_OK,所以746行我们就break.换言之,我们取到了一个request,我们为之准备好了scsi命令,我们下一步就该是执行这个命令了.所以我们不需要再在elv_next_request()中滞留.我们终于回到了scsi_request_fn(),汤唯姐姐曾坦言拍床戏的经验让她恍如在地狱走了一趟,而看代码的我又何尝不是如此呢?而且汤唯姐姐说虽然过程好似地狱,但过后就是天堂.而我们则永远陷在这代码中,不知何时才是个头,这不,结束了elv_next_request,又要看下一个,不只是一个,而是两个,1467,一个宏加一个函数,宏是blk_queue_tagged,来自include/linux/blkdev.h:

524 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)

而函数是blk_queue_start_tag,来自block/ll_rw_blk.c:

1104 /**

1105 * blk_queue_start_tag - find a free tag and assign it

1106 * @q: the request queue for the device

1107 * @rq: the block request that needs tagging

1108 *

1109 * Description:

1110 * This can either be used as a stand-alone helper, or possibly be

1111 * assigned as the queue &prep_rq_fn (in which case &struct request

1112 * automagically gets a tag assigned). Note that this function

1113 * assumes that any type of request can be queued! if this is not

1114 * true for your device, you must check the request type before

1115 * calling this function. The request will also be removed from

1116 * the request queue, so it's the drivers responsibility to readd

1117 * it if it should need to be restarted for some reason.

1118 *

1119 * Notes:

1120 * queue lock must be held.

1121 **/

1122 int blk_queue_start_tag(request_queue_t *q, struct request *rq)

1123 {

1124 struct blk_queue_tag *bqt = q->queue_tags;

1125 int tag;

1126

1127 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {

1128 printk(KERN_ERR

1129 "%s: request %p for device [%s] already tagged %d",

1130 __FUNCTION__, rq,

1131 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);

1132 BUG();

1133 }

1134

1135 /*

1136 * Protect against shared tag maps, as we may not have exclusive

1137 * access to the tag map.

1138 */

1139 do {

1140 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);

1141 if (tag >= bqt->max_depth)

1142 return 1;

1143

1144 } while (test_and_set_bit(tag, bqt->tag_map));

1145

1146 rq->cmd_flags |= REQ_QUEUED;

1147 rq->tag = tag;

1148 bqt->tag_index[tag] = rq;

1149 blkdev_dequeue_request(rq);

1150 list_add(&rq->queuelist, &bqt->busy_list);

1151 bqt->busy++;

1152 return 0;

1153 }

对于我们大多数人来说,这两个函数的返回值都是0.

也因此,下一个函数blkdev_dequeue_request()就会被执行.来自include/linux/blkdev.h:

725 static inline void blkdev_dequeue_request(struct request *req)

726 {

727 elv_dequeue_request(req->q, req);

728 }

elv_dequeue_request来自block/elevator.c:

778 void elv_dequeue_request(request_queue_t *q, struct request *rq)

779 {

780 BUG_ON(list_empty(&rq->queuelist));

781 BUG_ON(ELV_ON_HASH(rq));

782

783 list_del_init(&rq->queuelist);

784

785 /*

786 * the time frame between a request being removed from the lists

787 * and to it is freed is accounted as io that is in progress at

788 * the driver side.

789 */

790 if (blk_account_rq(rq))

791 q->in_flight++;

792 }

现在这个社会就是利用与被利用的关系,既然这个request已经没有了利用价值,我们已经从它身上得到了我们想要的scsi命令,那么我们完全可以过河拆桥卸磨杀驴了.list_del_init把这个requestrequest queue队列里删除掉.

而下面这个blk_account_rq也是一个来自include/linux/blkdev.h的宏:

536 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))

很显然,至少第二个条件我们是不满足的.所以不用多说,结束这个elv_dequeue_request.

现在是时候去执行scsi命令了.所以调用scsi_dispatch_cmd().

你可能感兴趣的:(linux)