Ide_driver的注册(二)
还记得很久很久以前,当我们还在讲述ide_generic的那段青涩往事的时候,讲过一个 static int ide_init_queue(ide_drive_t *drive) 函数,他里面调用了一个q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));。对没错就是他do_ide_request,当时我们还单独把它提出来说迟早有一天他会翻身做人的。没想到到了今天,他总算是快解放了。好了,闲话少叙。blk_execute_rq我们可以不去研究但是他最终不管通过何种方式迟早会调用到do_ide_request。那么接下来的日子我们就把目光投向这个天使般的精灵….
[ide-io.c]
433 /*
434 * Issue a new request to a device.
435 */
436 void do_ide_request(struct request_queue *q)
437 {
438 ide_drive_t *drive = q->queuedata;
439 ide_hwif_t *hwif = drive->hwif;
440 struct ide_host *host = hwif->host;
441 struct request *rq = NULL;
442 ide_startstop_t startstop;
443
444 /*
445 * drive is doing pre-flush, ordered write, post-flush sequence. even
446 * though that is 3 requests, it must be seen as a single transaction.
447 * we must not preempt this drive until that is complete
448 */
449 if (blk_queue_flushing(q))
450 /*
451 * small race where queue could get replugged during
452 * the 3-request flush cycle, just yank the plug since
453 * we want it to finish asap
454 */
455 blk_remove_plug(q);
456
457 spin_unlock_irq(q->queue_lock);
458
459 /* HLD do_request() callback might sleep, make sure it's okay */
460 might_sleep();
461
462 if (ide_lock_host(host, hwif))
463 goto plug_device_2;
464
465 spin_lock_irq(&hwif->lock);
466
467 if (!ide_lock_port(hwif)) {
468 ide_hwif_t *prev_port;
469
470 WARN_ON_ONCE(hwif->rq);
471 repeat:
472 prev_port = hwif->host->cur_port;
473 if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
474 time_after(drive->sleep, jiffies)) {
475 ide_unlock_port(hwif);
476 goto plug_device;
477 }
478
479 if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
480 hwif != prev_port) {
481 ide_drive_t *cur_dev =
482 prev_port ? prev_port->cur_dev : NULL;
483
484 /*
485 * set nIEN for previous port, drives in the
486 * quirk list may not like intr setups/cleanups
487 */
488 if (cur_dev &&
489 (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
490 prev_port->tp_ops->write_devctl(prev_port,
491 ATA_NIEN |
492 ATA_DEVCTL_OBS);
493
494 hwif->host->cur_port = hwif;
495 }
496 hwif->cur_dev = drive;
497 drive->dev_flags&=~(IDE_DFLAG_SLEEPING| IDE_DFLAG_PARKED);
498
499 spin_unlock_irq(&hwif->lock);
500 spin_lock_irq(q->queue_lock);
501 /*
502 * we know that the queue isn't empty, but this can happen
503 * if the q->prep_rq_fn() decides to kill a request
504 */
505 if (!rq)
506 rq = blk_fetch_request(drive->queue);
507
508 spin_unlock_irq(q->queue_lock);
509 spin_lock_irq(&hwif->lock);
510
511 if (!rq) {
512 ide_unlock_port(hwif);
513 goto out;
514 }
515
516 /*
517 * Sanity: don't accept a request that isn't a PM request
518 * if we are currently power managed. This is very important as
519 * blk_stop_queue() doesn't prevent the blk_fetch_request()
520 * above to return us whatever is in the queue. Since we call
521 * ide_do_request() ourselves, we end up taking requests while
522 * the queue is blocked...
523 *
524 * We let requests forced at head of queue with ide-preempt
525 * though. I hope that doesn't happen too much, hopefully not
526 * unless the subdriver triggers such a thing in its own PM
527 * state machine.
528 */
529 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
530 blk_pm_request(rq) == 0 &&
531 (rq->cmd_flags & REQ_PREEMPT) == 0) {
532 /* there should be no pending command at this point */
533 ide_unlock_port(hwif);
534 goto plug_device;
535 }
536
537 hwif->rq = rq;
538
539 spin_unlock_irq(&hwif->lock);
540 startstop = start_request(drive, rq);
541 spin_lock_irq(&hwif->lock);
542
543 if (startstop == ide_stopped) {
544 rq = hwif->rq;
545 hwif->rq = NULL;
546 goto repeat;
547 }
548 } else
549 goto plug_device;
550 out:
551 spin_unlock_irq(&hwif->lock);
552 if (rq == NULL)
553 ide_unlock_host(host);
554 spin_lock_irq(q->queue_lock);
555 return;
556
557 plug_device:
558 spin_unlock_irq(&hwif->lock);
559 ide_unlock_host(host);
560 plug_device_2:
561 spin_lock_irq(q->queue_lock);
562
563 if (rq)
564 blk_requeue_request(q, rq);
565 if (!elv_queue_empty(q))
566 blk_plug_device(q);
567 }
故事发展到今天,已经很难让她和块设备层之间不发生“龌龊”的关系。但是作为明理的我们就应该始终保持一颗清醒的头脑,无论块设备那边有多的诱惑和神秘感,我们都要对ide驱动从一而终,和块设备保持高度的警惕。有多少的好奇等我们看完ide再说。保持这个态度来看代码:
453-455行这里是块设备相关的,这里我们只需要了解他是用来取消当前请求队列阻塞标志位的。
460行表示这个do_ide_request可能会睡眠。
410行ide_lock_host,现在IDE驱动的host要干活了,所以设置一个busy,然后对应的接口也就相应的lock上。代码如下:
[ide-io.c]
410 static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif)
411 {
412 int rc = 0;
413
414 if (host->host_flags & IDE_HFLAG_SERIALIZE) {
415 rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy);
416 if (rc == 0) {
417 if (host->get_lock)
418 host->get_lock(ide_intr, hwif);
419 }
420 }
421 return rc;
422 }
472-477行如果设备要睡眠,这算是吃了个闭门羹,自找没趣就直接返回吧。
479-495行这里比较简单就不多说了,其中有一段是关于NIEN_QUIRK的,这些怪癖的驱动器前面也罗列出来了。
505-506行道理很简单,有了队列当然最终我们希望的是request,那么这个函数就从队列中取出了。
511-514行检察一下有效性。
529-535行这是一段关于电源管理的内容,飘过…..
540行start_request,故事的高潮就要上演了,在迎接这一令人兴奋的时刻,我们先来解决ide_do_request剩下来的几行代码。
557-567行这是关于request出错的处理,不是我们希望的,如果请求失败就重新请求。正常的出口是在550行的OUT。
基于start_request显赫的江湖地位,接下来的时间我们就来细说….
[ide.c]
297 /**
298 * start_request - start of I/O and command issuing for IDE
299 *
300 * start_request() initiates handling of a new I/O request. It
301 * accepts commands and I/O (read/write) requests.
302 *
303 * FIXME: this function needs a rename
304 */
305
306 static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
307 {
308 ide_startstop_t startstop;
309
310 BUG_ON(!blk_rq_started(rq));
311
312 #ifdef DEBUG
313 printk("%s: start_request: current=0x%08lx/n",
314 drive->hwif->name, (unsigned long) rq);
315 #endif
316
317 /* bail early if we've exceeded max_failures */
318 if (drive->max_failures && (drive->failures > drive->max_failures)) {
319 rq->cmd_flags |= REQ_FAILED;
320 goto kill_rq;
321 }
322
323 if (blk_pm_request(rq))
324 ide_check_pm_state(drive, rq);
325
326 drive->hwif->tp_ops->dev_select(drive);
327 if (ide_wait_stat(&startstop, drive, drive->ready_stat,
328 ATA_BUSY | ATA_DRQ, WAIT_READY)) {
329 printk(KERN_ERR "%s: drive not ready for command/n", drive->name);
330 return startstop;
331 }
332
333 if (drive->special_flags == 0) {
334 struct ide_driver *drv;
335
336 /*
337 * We reset the drive so we need to issue a SETFEATURES.
338 * Do it _after_ do_special() restored device parameters.
339 */
340 if (drive->current_speed == 0xff)
341 ide_config_drive_speed(drive, drive->desired_speed);
342
343 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
344 return execute_drive_cmd(drive, rq);
345 else if (blk_pm_request(rq)) {
346 struct request_pm_state *pm = rq->special;
347 #ifdef DEBUG_PM
348 printk("%s: start_power_step(step: %d)/n",
349 drive->name, pm->pm_step);
350 #endif
351 startstop = ide_start_power_step(drive, rq);
352 if (startstop == ide_stopped &&
353 pm->pm_step == IDE_PM_COMPLETED)
354 ide_complete_pm_rq(drive, rq);
355 return startstop;
356 } else if (!rq->rq_disk && blk_special_request(rq))
357 /*
358 * TODO: Once all ULDs have been modified to
359 * check for specific op codes rather than
360 * blindly accepting any special request, the
361 * check for ->rq_disk above may be replaced
362 * by a more suitable mechanism or even
363 * dropped entirely.
364 */
365 return ide_special_rq(drive, rq);
366
367 drv = *(struct ide_driver **)rq->rq_disk->private_data;
368
369 return drv->do_request(drive, rq, blk_rq_pos(rq));
370 }
371 return do_special(drive);
372 kill_rq:
373 ide_kill_rq(drive, rq);
374 return ide_stopped;
375 }
318-321行前面我们说过,对于失败了的request我们采取的措施是重新提交。那么这里就是失败允许的次数了,如果失败max_failures就直接标上无可救药返回。
323-324行还是关于电源管理的,略过。
326行选择驱动器,我们知道一个IDE接口上面可以挂接两个IDE设备,访问主设备还是从设备由选择驱动器命令完成。
327行等待设备准备好,调用ide_wait_stat,源码如下:
[ide-iops.c]
159 /*
160 * In case of error returns error value after doing "*startstop = ide_error()".
161 * The caller should return the updated value of "startstop" in this case,
162 * "startstop" is unchanged when the function returns 0.
163 */
164 int ide_wait_stat(ide_startstop_t *startstop, ide_drive_t *drive, u8 good,
165 u8 bad, unsigned long timeout)
166 {
167 int err;
168 u8 stat;
169
170 /* bail early if we've exceeded max_failures */
171 if (drive->max_failures && (drive->failures > drive->max_failures)) {
172 *startstop = ide_stopped;
173 return 1;
174 }
175
176 err = __ide_wait_stat(drive, good, bad, timeout, &stat);
177
178 if (err) {
179 char *s = (err == -EBUSY) ? "status timeout" : "status error";
180 *startstop = ide_error(drive, s, stat);
181 }
182
183 return err;
184 }
这里最关键的是176行,这个函数我们之前遇到过但是没有分析过。这里为了表示对她的歉意,现在补上这一课。看代码:
[ide-iops.c]
94 /*
95 * This routine busy-waits for the drive status to be not "busy".
96 * It then checks the status for all of the "good" bits and none
97 * of the "bad" bits, and if all is okay it returns 0. All other
98 * cases return error -- caller may then invoke ide_error().
99 *
100 * This routine should get fixed to not hog the cpu during extra long waits..
101 * That could be done by busy-waiting for the first jiffy or two, and then
102 * setting a timer to wake up at half second intervals thereafter,
103 * until timeout is achieved, before timing out.
104 */
105 int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
106 unsigned long timeout, u8 *rstat)
107 {
108 ide_hwif_t *hwif = drive->hwif;
109 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
110 unsigned long flags;
111 int i;
112 u8 stat;
113
114 udelay(1); /* spec allows drive 400ns to assert "BUSY" */
115 stat = tp_ops->read_status(hwif);
116
117 if (stat & ATA_BUSY) {
118 local_save_flags(flags);
119 local_irq_enable_in_hardirq();
120 timeout += jiffies;
121 while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
122 if (time_after(jiffies, timeout)) {
123 /*
124 * One last read after the timeout in case
125 * heavy interrupt load made us not make any
126 * progress during the timeout..
127 */
128 stat = tp_ops->read_status(hwif);
129 if ((stat & ATA_BUSY) == 0)
130 break;
131
132 local_irq_restore(flags);
133 *rstat = stat;
134 return -EBUSY;
135 }
136 }
137 local_irq_restore(flags);
138 }
139 /*
140 * Allow status to settle, then read it again.
141 * A few rare drives vastly violate the 400ns spec here,
142 * so we'll wait up to 10usec for a "good" status
143 * rather than expensively fail things immediately.
144 * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
145 */
146 for (i = 0; i < 10; i++) {
147 udelay(1);
148 stat = tp_ops->read_status(hwif);
149
150 if (OK_STAT(stat, good, bad)) {
151 *rstat = stat;
152 return 0;
153 }
154 }
155 *rstat = stat;
156 return -EFAULT;
157 }
119行这里把这个提出来说是因为这里使能了中断,也就是说后面在查询这个设备状态的过程中,其他的任务也是在执行的。
121 行读取设备状态判断是否超时。
128-129行由于这个查询设备状态的过程中如果系统任务比较繁重,也就是可能
读取设备状态这个过程已经很久没有执行,为了避免这种悲剧的发生,这里有必要在判断一次。
146-154行这些是为了防止时序上的原因可能引发的超时,如果这些都没能将设备带入一个良好的状态,我们就认为是设备出现了错误。然后带着错误的状态就返回了。
回到ide_wait_stat回到178-181行,按理说这是一段出错的代码,前面我们对出错的情况都是没有分析过的。但是这里我们单独把这个错误提出来分析一下,因为这对调试ide驱动是很意义的。
180行ide_error,看函数源码:
[Ide-eh.c]
100 /**
101 * ide_error - handle an error on the IDE
102 * @drive: drive the error occurred on
103 * @msg: message to report
104 * @stat: status bits
105 *
106 * ide_error() takes action based on the error returned by the drive.
107 * For normal I/O that may well include retries. We deal with
108 * both new-style (taskfile) and old style command handling here.
109 * In the case of taskfile command handling there is work left to
110 * do
111 */
112
113 ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
114 {
115 struct request *rq;
116 u8 err;
117
118 err = ide_dump_status(drive, msg, stat);
119
120 rq = drive->hwif->rq;
121 if (rq == NULL)
122 return ide_stopped;
123
124 /* retry only "normal" I/O: */
125 if (!blk_fs_request(rq)) {
126 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
127 struct ide_cmd *cmd = rq->special;
128
129 if (cmd)
130 ide_complete_cmd(drive, cmd, stat, err);
131 } else if (blk_pm_request(rq)) {
132 rq->errors = 1;
133 ide_complete_pm_rq(drive, rq);
134 return ide_stopped;
135 }
136 rq->errors = err;
137 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
138 return ide_stopped;
139 }
140
141 return __ide_error(drive, rq, stat, err);
142 }
118这里这个如果是状态的错误,会将状态数据翻译成出错的信息详细打印出来。
125-135行会尝试对正常的IO请求进行重试。
141行会对这些出错进行详细处理。这部分的内容可以再设备出现错误时提供错误定位信息,具体的程序代码比较简单就不详细分析了。继续回到start_request中来。
344行是我们关注的重点,进去看看:
[ide-io.c]
241 /**
242 * execute_drive_command - issue special drive command
243 * @drive: the drive to issue the command on
244 * @rq: the request structure holding the command
245 *
246 * execute_drive_cmd() issues a special drive command, usually
247 * initiated by ioctl() from the external hdparm program. The
248 * command can be a drive command, drive task or taskfile
249 * operation. Weirdly you can call it with NULL to wait for
250 * all commands to finish. Don't do this as that is due to change
251 */
252
253 static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
254 struct request *rq)
255 {
256 struct ide_cmd *cmd = rq->special;
257
258 if (cmd) {
259 if (cmd->protocol == ATA_PROT_PIO) {
260 ide_init_sg_cmd(cmd, blk_rq_sectors(rq) << 9);
261 ide_map_sg(drive, cmd);
262 }
263
264 return do_rw_taskfile(drive, cmd);
265 }
266
267 /*
268 * NULL is actually a valid way of waiting for
269 * all current requests to be flushed from the queue.
270 */
271 #ifdef DEBUG
272 printk("%s: DRIVE_CMD (null)/n", drive->name);
273 #endif
274 rq->errors = 0;
275 ide_complete_rq(drive, 0, blk_rq_bytes(rq));
276
277 return ide_stopped;
278 }
264行成了这个函数的核心,对应了cmd的处理。
[ide-taskfile.c]
77 ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
78 {
79 ide_hwif_t *hwif = drive->hwif;
80 struct ide_cmd *cmd = &hwif->cmd;
81 struct ide_taskfile *tf = &cmd->tf;
82 ide_handler_t *handler = NULL;
83 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
84 const struct ide_dma_ops *dma_ops = hwif->dma_ops;
85
86 if (orig_cmd->protocol == ATA_PROT_PIO &&
87 (orig_cmd->tf_flags & IDE_TFLAG_MULTI_PIO) &&
88 drive->mult_count == 0) {
89 pr_err("%s: multimode not set!/n", drive->name);
90 return ide_stopped;
91 }
92
93 if (orig_cmd->ftf_flags & IDE_FTFLAG_FLAGGED)
94 orig_cmd->ftf_flags |= IDE_FTFLAG_SET_IN_FLAGS;
95
96 memcpy(cmd, orig_cmd, sizeof(*cmd));
97
98 if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
99 ide_tf_dump(drive->name, cmd);
100 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
101
102 if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
103 u8 data[2] = { cmd->tf.data, cmd->hob.data };
104
105 tp_ops->output_data(drive, cmd, data, 2);
106 }
107
108 if (cmd->valid.out.tf & IDE_VALID_DEVICE) {
109 u8 HIHI = (cmd->tf_flags & IDE_TFLAG_LBA48) ?
110 0xE0 : 0xEF;
111
112 if (!(cmd->ftf_flags & IDE_FTFLAG_FLAGGED))
113 cmd->tf.device &= HIHI;
114 cmd->tf.device |= drive->select;
115 }
116
117 tp_ops->tf_load(drive, &cmd->hob, cmd->valid.out.hob);
118 tp_ops->tf_load(drive, &cmd->tf, cmd->valid.out.tf);
119 }
120
121 switch (cmd->protocol) {
122 case ATA_PROT_PIO:
123 if (cmd->tf_flags & IDE_TFLAG_WRITE) {
124 tp_ops->exec_command(hwif, tf->command);
125 ndelay(400); /* FIXME */
126 return pre_task_out_intr(drive, cmd);
127 }
128 handler = task_pio_intr;
129 /* fall-through */
130 case ATA_PROT_NODATA:
131 if (handler == NULL)
132 handler = task_no_data_intr;
133 ide_execute_command(drive, cmd, handler, WAIT_WORSTCASE);
134 return ide_started;
135 case ATA_PROT_DMA:
136 if (ide_dma_prepare(drive, cmd))
137 return ide_stopped;
138 hwif->expiry = dma_ops->dma_timer_expiry;
139 ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD);
140 dma_ops->dma_start(drive);
141 default:
142 return ide_started;
143 }
144 }
一路走来根据我们分析的情景来看,这里我们只需要关心130-134行的代码,应为我们的协议对应为ATA_PROT_NODATA。
先看133行再来解释132行,源码如下:
[ide-iops.c]
468 /**
469 * ide_execute_command - execute an IDE command
470 * @drive: IDE drive to issue the command against
471 * @cmd: command
472 * @handler: handler for next phase
473 * @timeout: timeout for command
474 *
475 * Helper function to issue an IDE command. This handles the
476 * atomicity requirements, command timing and ensures that the
477 * handler and IRQ setup do not race. All IDE command kick off
478 * should go via this function or do equivalent locking.
479 */
480
481 void ide_execute_command(ide_drive_t *drive, struct ide_cmd *cmd,
482 ide_handler_t *handler, unsigned timeout)
483 {
484 ide_hwif_t *hwif = drive->hwif;
485 unsigned long flags;
486
487 spin_lock_irqsave(&hwif->lock, flags);
488 if ((cmd->protocol != ATAPI_PROT_DMA &&
489 cmd->protocol != ATAPI_PROT_PIO) ||
490 (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT))
491 __ide_set_handler(drive, handler, timeout);
492 hwif->tp_ops->exec_command(hwif, cmd->tf.command);
493 /*
494 * Drive takes 400nS to respond, we must avoid the IRQ being
495 * serviced before that.
496 *
497 * FIXME: we could skip this delay with care on non shared devices
498 */
499 ndelay(400);
500 spin_unlock_irqrestore(&hwif->lock, flags);
501 }
明眼人一下就看到了492行,的确就是这儿我们的命令作用到了硬件上。关于tp-ops中的exec_command我们之前已经说过了。是否我们写入了一个命令就结束了我们这段艰难的旅程呢?其实事情远不是我们想的那么简单,内核也不是我们想想的那么不负责任。写入了命令是否硬盘就执行成功了呢?这个工作就交给491行这为使者来做了。
[Ide-iops.c]
435 /*
436 * This should get invoked any time we exit the driver to
437 * wait for an interrupt response from a drive. handler() points
438 * at the appropriate code to handle the next interrupt, and a
439 * timer is started to prevent us from waiting forever in case
440 * something goes wrong (see the ide_timer_expiry() handler later on).
441 *
442 * See also ide_execute_command
443 */
444 void __ide_set_handler(ide_drive_t *drive, ide_handler_t *handler,
445 unsigned int timeout)
446 {
447 ide_hwif_t *hwif = drive->hwif;
448
449 BUG_ON(hwif->handler);
450 hwif->handler = handler;
451 hwif->timer.expires = jiffies + timeout;
452 hwif->req_gen_timer = hwif->req_gen;
453 add_timer(&hwif->timer);
454 }
这段代码可以说没有一个我们不熟悉的C语言语法,也许人家是大智若愚。下面我们就从两个方面来说说他的智体现在哪些方面。
按照先后顺序我们来先说450行,也就是
智慧一:
Linux内核代码之所以如此精辟,我觉得很主要的一个原因是容易让人产生健忘,这样前面说的后面联系不上,猛然回头一看的时候,很容易让人产生扑朔迷离的感觉,从而衬托出他的一种境界。当然,要说清楚这里的这个故事就得回到N年以前我们讲到的一个传说。早在ide-generic中add-host的时候我们通过层层调用最终通过request_irq申请了一个ide中断,当说到对应的中断向量是ide_intr的时候我们妥协了,说以后有用到的时候再来分析。今天总算到了他开花结果的时候了,进去开开眼界: