微视linux scsi驱动错误中断处理

SCSI有两种常见的故障类型。
一种是卡自身故障发出错误中断或者中断里面自带错误信息;
另外一种是卡没有响应,丢中断触发定时器超时错误。

对于第一种故障模型:
硬件中断执行结束后会触发软中断,流程如下

void blk_done_softirq(struct softirq_action *h)
{
	struct list_head *cpu_list, local_list;

	local_irq_disable();
	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_replace_init(cpu_list, &local_list);
	local_irq_enable();

	while (!list_empty(&local_list)) {//遍历链表,执行钩子函数
		struct request *rq;

		rq = list_entry(local_list.next, struct request, csd.list);
		list_del_init(&rq->csd.list);
		rq->q->softirq_done_fn(rq);
		=>void scsi_softirq_done(struct request *rq)
		{
			/*解析底层控制器中断的处理结果,对于USB控制器,是由usb_stor_invoke_transport完成
			 *错误一般是重试,走NEEDS_RETRY分支,最多重试5次,超过5次走default分支
			 */
			disposition = scsi_decide_disposition(cmd);
			
			switch (disposition) {
				case SUCCESS:
					scsi_finish_command(cmd);
					break;
				case NEEDS_RETRY:
					scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
					break;
				case ADD_TO_MLQUEUE:
					scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
					break;
				default:
					ret = !scsi_eh_scmd_add(cmd, 0);
					=>int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
					{
						struct Scsi_Host *shost = scmd->device->host;
						unsigned long flags;
						int ret = 0;

						if (!shost->ehandler)
							return 0;

						spin_lock_irqsave(shost->host_lock, flags);
						if (scsi_host_set_state(shost, SHOST_RECOVERY))
							if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
								goto out_unlock;

						ret = 1;
						scmd->eh_eflags |= eh_flag;
						list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
						shost->host_failed++;
						scsi_eh_wakeup(shost);//唤醒异常处理线程
						void scsi_eh_wakeup(struct Scsi_Host *shost)//内容太多,详见下面
						
					 out_unlock:
						spin_unlock_irqrestore(shost->host_lock, flags);
						return ret;
					}

					
					if (ret)
						scsi_finish_command(cmd);
			}
		}
	}
		
}

错误处理线程流程如下:

void scsi_eh_wakeup(struct Scsi_Host *shost)
{
	if (shost->host_busy == shost->host_failed) 
		wake_up_process(shost->ehandler);
		=>int scsi_error_handler(void *data)
		{
			struct Scsi_Host *shost = data;

			/*
			 * We use TASK_INTERRUPTIBLE so that the thread is not
			 * counted against the load average as a running process.
			 * We never actually get interrupted because kthread_run
			 * disables signal delivery for the created thread.
			 */
			set_current_state(TASK_INTERRUPTIBLE);
			while (!kthread_should_stop()) {
				if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
					shost->host_failed != shost->host_busy) {
					schedule();
					set_current_state(TASK_INTERRUPTIBLE);
					continue;
				}

				__set_current_state(TASK_RUNNING);

				/*
				 * We have a host that is failing for some reason.  Figure out
				 * what we need to do to get it up and online again (if we can).
				 * If we fail, we end up taking the thing offline.
				 */
				if (shost->transportt->eh_strategy_handler)//如果有自定义的钩子函数则执行自定义钩子函数
					shost->transportt->eh_strategy_handler(shost);
				else
					scsi_unjam_host(shost);//系统默认钩子函数
					=>void scsi_unjam_host(struct Scsi_Host *shost)
					{
						unsigned long flags;
						LIST_HEAD(eh_work_q);
						LIST_HEAD(eh_done_q);

						spin_lock_irqsave(shost->host_lock, flags);
						list_splice_init(&shost->eh_cmd_q, &eh_work_q);
						spin_unlock_irqrestore(shost->host_lock, flags);

						SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));

						if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
							if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
								/*Scsiglue.c (drivers\usb\storage):	.eh_abort_handler =		command_abort,*/
								=>int command_abort(struct scsi_cmnd *srb)
								{
									set_bit(US_FLIDX_TIMED_OUT, &us->dflags);//定时器唤醒
									if (!test_bit(US_FLIDX_RESETTING, &us->dflags)) {
										set_bit(US_FLIDX_ABORTING, &us->dflags);
										usb_stor_stop_transport(us);
										/* Stop the current URB transfer */
										=>void usb_stor_stop_transport(struct us_data *us)
										{
											if (test_and_clear_bit(US_FLIDX_URB_ACTIVE, &us->dflags)) {
												US_DEBUGP("-- cancelling URB\n");
												usb_unlink_urb(us->current_urb);
												=>int usb_unlink_urb(struct urb *urb)
												{
													return usb_hcd_unlink_urb(urb, -ECONNRESET);
													=>int usb_hcd_unlink_urb (struct urb *urb, int status)
													{
														retval = unlink1(hcd, urb, status);
														=>int unlink1(struct usb_hcd *hcd, struct urb *urb, int status)
														{
															value = usb_rh_urb_dequeue(hcd, urb, status);
															=>int usb_rh_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
															{
																usb_hcd_giveback_urb(hcd, urb, status);
																=>void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
																{
																	urb->status = status;
																	urb->complete (urb);
																}
															}
														}
													}
												}
											}
										}
									}
								}
							
								scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
								=>void scsi_eh_ready_devs(struct Scsi_Host *shost,
											struct list_head *work_q,
											struct list_head *done_q)
								{
									if (!scsi_eh_stu(shost, work_q, done_q))//逐级从轻到重复位
										if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
											if (!scsi_eh_target_reset(shost, work_q, done_q))
												if (!scsi_eh_bus_reset(shost, work_q, done_q))
													if (!scsi_eh_host_reset(work_q, done_q))
														/*搞不定则将其踢出去*/
														scsi_eh_offline_sdevs(work_q, done_q);
								}

						scsi_eh_flush_done_q(&eh_done_q);
					}

				/*
				 * Note - if the above fails completely, the action is to take
				 * individual devices offline and flush the queue of any
				 * outstanding requests that may have been pending.  When we
				 * restart, we restart any I/O to any other devices on the bus
				 * which are still online.
				 */
				scsi_restart_operations(shost);
				set_current_state(TASK_INTERRUPTIBLE);
			}
			__set_current_state(TASK_RUNNING);

			shost->ehandler = NULL;
			return 0;
		}
}

 

你可能感兴趣的:(linux内核)